From 0ab7560477d54d99ebb418b6157d1126e8b9cde1 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 21 Jul 2016 13:44:22 +0200 Subject: [PATCH 01/49] Added top_progression chart for tld, domain and scheme + Small modification in config file. --- bin/WebStats.py | 132 ++++++++++-------- bin/packages/config.cfg.sample | 4 +- bin/packages/lib_words.py | 12 +- var/www/Flask_server.py | 53 +++++++ var/www/templates/Trending.html | 44 +++++- var/www/templates/index.html | 4 +- .../trending_graphs/Domainstrending.html | 39 ++++++ .../trending_graphs/Protocolstrending.html | 39 ++++++ .../trending_graphs/Tldstrending.html | 39 ++++++ 9 files changed, 293 insertions(+), 73 deletions(-) diff --git a/bin/WebStats.py b/bin/WebStats.py index 71bbd944..2bb94fc3 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -17,13 +17,12 @@ from Helper import Process from pyfaup.faup import Faup # Config Var -threshold_need_to_look = 50 -range_to_look = 10 -threshold_to_plot = 1 #500% -to_plot = set() -clean_frequency = 10 #minutes +threshold_total_sum = 200 # Above this value, a keyword is eligible for a progression +threshold_increase = 1.0 # The percentage representing the keyword occurence since num_day_to_look +max_set_cardinality = 10 # The cardinality of the progression set +num_day_to_look = 5 # the detection of the progression start num_day_to_look in the past -def analyse(server, field_name): +def analyse(server, field_name, date, url_parsed): field = url_parsed[field_name] if field is not None: prev_score = server.hget(field, date) @@ -31,51 +30,57 @@ def analyse(server, field_name): server.hset(field, date, int(prev_score) + 1) else: server.hset(field, date, 1) + if field_name == "domain": #save domain in a set for the monthly plot + domain_set_name = "domain_set_" + date[0:6] + server.sadd(domain_set_name, field) + print "added in " + domain_set_name +": "+ field -def analyse_and_progression(server, field_name): - field = url_parsed[field_name] - if field is not None: - prev_score = server.hget(field, date) - if prev_score is not None: - print field + ' prev_score:' + prev_score - server.hset(field, date, int(prev_score) + 1) - if int(prev_score) + 1 > threshold_need_to_look: #threshold for false possitive - if(check_for_progression(server, field, date)): - to_plot.add(field) - else: - server.hset(field, date, 1) - -def check_for_progression(server, field, date): - previous_data = set() - tot_sum = 0 - for i in range(0, range_to_look): - curr_value = server.hget(field, Date(date).substract_day(i)) - if curr_value is None: #no further data - break - else: - curr_value = int(curr_value) - previous_data.add(curr_value) - tot_sum += curr_value - if i == 0: - today_val = curr_value - - - print 'totsum='+str(tot_sum) - print 'div='+str(tot_sum/today_val) - if tot_sum/today_val >= threshold_to_plot: - return True - else: - return False - -def clean_to_plot(): - temp_to_plot = set() +def get_date_range(num_day): curr_date = datetime.date.today() - date = Date(str(curr_date.year)+str(curr_date.month)+str(curr_date.day)) + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + +def compute_progression(server, field_name, num_day, url_parsed): + redis_progression_name = 'top_progression_'+field_name + redis_progression_name_set = 'top_progression_'+field_name+'_set' + + keyword = url_parsed[field_name] + if keyword is not None: + date_range = get_date_range(num_day) + # check if this keyword is eligible for progression + keyword_total_sum = 0 + value_list = [] + for date in date_range: + curr_value = server.hget(keyword, date) + value_list.append(int(curr_value if curr_value is not None else 0)) + keyword_total_sum += int(curr_value) if curr_value is not None else 0 + oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division + keyword_increase = value_list[0] / oldest_value + + # filter + if (keyword_total_sum > threshold_total_sum) and (keyword_increase > threshold_increase): + + if server.sismember(redis_progression_name_set, keyword): #if keyword is in the set + server.hset(redis_progression_name, keyword, keyword_increase) #update its value + + elif (server.scard(redis_progression_name_set) < max_set_cardinality): + server.sadd(redis_progression_name_set, keyword) + + else: #not in the set + #Check value for all members + member_set = [] + for keyw in server.smembers(redis_progression_name_set): + member_set += (keyw, int(server.hget(redis_progression_name, keyw))) + member_set.sort(key=lambda tup: tup[1]) + if member_set[0] < keyword_increase: + #remove min from set and add the new one + server.srem(redis_progression_name_set, member_set[0]) + server.sadd(redis_progression_name_set, keyword) - for elem in to_plot: - if(check_for_progression(field, date)): - temp_to_plot.add(elem) - to_plot = temp_to_plot if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) @@ -99,10 +104,10 @@ if __name__ == '__main__': port=p.config.get("Redis_Level_DB", "port"), db=p.config.get("Redis_Level_DB", "db")) - r_serv2 = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_Domain", "host"), - port=p.config.get("Redis_Level_DB_Domain", "port"), - db=p.config.get("Redis_Level_DB_Domain", "db")) + r_serv_trend = redis.StrictRedis( + host=p.config.get("Redis_Level_DB_Trending", "host"), + port=p.config.get("Redis_Level_DB_Trending", "port"), + db=p.config.get("Redis_Level_DB_Trending", "db")) # FILE CURVE SECTION # csv_path_proto = os.path.join(os.environ['AIL_HOME'], @@ -129,27 +134,29 @@ if __name__ == '__main__': if message is None: if generate_new_graph: generate_new_graph = False - print 'Building graph' today = datetime.date.today() year = today.year month = today.month - lib_words.create_curve_with_word_file(r_serv1, csv_path_proto, + print 'Building protocol graph' + lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto, protocolsfile_path, year, month) - lib_words.create_curve_with_word_file(r_serv1, csv_path_tld, + print 'Building tld graph' + lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld, tldsfile_path, year, month) - lib_words.create_curve_with_list(r_serv2, csv_path_domain, - to_plot, year, + print 'Building domain graph' + lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain, + "domain", year, month) print 'end building' publisher.debug("{} queue is empty, waiting".format(config_section)) print 'sleeping' - time.sleep(5) + time.sleep(5*60) continue else: @@ -159,6 +166,9 @@ if __name__ == '__main__': faup.decode(url) url_parsed = faup.get() - analyse(r_serv1, 'scheme') #Scheme analysis - analyse(r_serv1, 'tld') #Tld analysis - analyse_and_progression(r_serv2, 'domain') #Domain analysis + analyse(r_serv_trend, 'scheme', date, url_parsed) #Scheme analysis + analyse(r_serv_trend, 'tld', date, url_parsed) #Tld analysis + analyse(r_serv_trend, 'domain', date, url_parsed) #Domain analysis + compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed) + compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed) + compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 853edd2a..31d9b147 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -49,10 +49,10 @@ host = localhost port = 2013 db = 0 -[Redis_Level_DB_Domain] +[Redis_Level_DB_Trending] host = localhost port = 2016 -db = 3 +db = 0 [Redis_Level_DB_Hashs] host = localhost diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py index b2cf418b..e98609d7 100644 --- a/bin/packages/lib_words.py +++ b/bin/packages/lib_words.py @@ -88,7 +88,7 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month with open(feederfilename, 'rb') as f: # words of the files - words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' ]) + words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ]) headers = ['Date'] + words with open(csvfilename+'.csv', 'wb') as f: @@ -112,7 +112,7 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month row.append(value) writer.writerow(row) -def create_curve_with_list(server, csvfilename, to_plot, year, month): +def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month): """Create a csv file used with dygraph. :param r_serv: -- connexion to redis database @@ -122,15 +122,17 @@ def create_curve_with_list(server, csvfilename, to_plot, year, month): :param month: -- (integer) The month to process This function create a .csv file using datas in redis. - It's checking if the words contained in to_plot and + It's checking if the words contained in set_to_plot and their respectives values by days exists. """ first_day = date(year, month, 01) last_day = date(year, month, calendar.monthrange(year, month)[1]) - words = sorted(to_plot) - + + redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2) + words = list(server.smembers(redis_set_name)) + headers = ['Date'] + words with open(csvfilename+'.csv', 'wb') as f: writer = csv.writer(f) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 018608f1..d6913999 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -4,12 +4,14 @@ import redis import ConfigParser import json +import datetime from flask import Flask, render_template, jsonify, request import flask import os import sys sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Paste +from Date import Date # CONFIG # configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') @@ -35,6 +37,11 @@ r_serv_log = redis.StrictRedis( port=cfg.getint("Redis_Log", "port"), db=cfg.getint("Redis_Log", "db")) +r_serv_charts = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_Trending", "host"), + port=cfg.getint("Redis_Level_DB_Trending", "port"), + db=cfg.getint("Redis_Level_DB_Trending", "db")) + app = Flask(__name__, static_url_path='/static/') @@ -76,6 +83,20 @@ def showpaste(content_range): return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content)) +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + + + + + +# ============ ROUTES ============ @app.route("/_logs") def logs(): @@ -86,6 +107,38 @@ def logs(): def stuff(): return jsonify(row1=get_queues(r_serv)) +@app.route("/_progressionCharts", methods=['GET']) +def progressionCharts(): + #To be used later + attribute_name = request.args.get('attributeName') + trending_name = request.args.get('trendingName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(attribute_name, date) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) + return jsonify(bar_values) + + else: + redis_progression_name = 'top_progression_'+trending_name + redis_progression_name_set = 'top_progression_'+trending_name+'_set' + + member_set = [] + for keyw in r_serv_charts.smembers(redis_progression_name_set): + keyw_value = r_serv_charts.hget(redis_progression_name, keyw) + keyw_value = keyw_value if keyw_value is not None else 0 + member_set.append((keyw, int(keyw_value))) + member_set.sort(key=lambda tup: tup[1], reverse=True) + if len(member_set) == 0: + member_set.append(("No relevant data", int(100))) + return jsonify(member_set) + @app.route("/search", methods=['POST']) def search(): diff --git a/var/www/templates/Trending.html b/var/www/templates/Trending.html index 8928f948..9031076c 100644 --- a/var/www/templates/Trending.html +++ b/var/www/templates/Trending.html @@ -16,6 +16,9 @@ + + + + + +
{% include 'trending_graphs/Tldstrending.html' %} @@ -87,6 +98,24 @@ + + @@ -111,6 +147,8 @@ $(document).ready(function(){ // Create the graph when the page has just loaded create_and_plot("TldTrending", '../static//csv/tldstrendingdata.csv') + //Top progression chart + refresh_top_chart("tld", true); }); // Used when we modify the number of displayed curves diff --git a/var/www/templates/index.html b/var/www/templates/index.html index eb34aaf0..1ceac491 100644 --- a/var/www/templates/index.html +++ b/var/www/templates/index.html @@ -14,9 +14,10 @@ - + + - diff --git a/var/www/templates/trending_graphs/Domainstrending.html b/var/www/templates/trending_graphs/Domainstrending.html index cc403fd7..654a86c6 100644 --- a/var/www/templates/trending_graphs/Domainstrending.html +++ b/var/www/templates/trending_graphs/Domainstrending.html @@ -1,3 +1,42 @@ +
+
+
+
+
+
+ Top Progression for the last 5 days +
+
+
+
+
Click on a part
+
+ +
+ +
+
+ +
+
+
+ Top Progression for the last 15 days +
+
+
+
Click on a part
+
+ +
+ +
+
+
+ +
+
+ +
Top Domain Trending diff --git a/var/www/templates/trending_graphs/Protocolstrending.html b/var/www/templates/trending_graphs/Protocolstrending.html index 2f4c45af..5b4785ce 100644 --- a/var/www/templates/trending_graphs/Protocolstrending.html +++ b/var/www/templates/trending_graphs/Protocolstrending.html @@ -1,3 +1,42 @@ +
+
+
+
+
+
+ Top Progression for the last 5 days +
+
+
+
+
Click on a part
+
+ +
+ +
+
+ +
+
+
+ Top Progression for the last 15 days +
+
+
+
Click on a part
+
+ +
+ +
+
+
+ +
+
+ +
Protocols Trend diff --git a/var/www/templates/trending_graphs/Tldstrending.html b/var/www/templates/trending_graphs/Tldstrending.html index 5c8b1c1e..92837332 100644 --- a/var/www/templates/trending_graphs/Tldstrending.html +++ b/var/www/templates/trending_graphs/Tldstrending.html @@ -1,3 +1,42 @@ +
+
+
+
+
+
+ Top Progression for the last 5 days +
+
+
+
+
Click on a part
+
+ +
+ +
+
+ +
+
+
+ Top Progression for the last 15 days +
+
+
+
Click on a part
+
+ +
+ +
+
+
+ +
+
+ +
Top Level Domain Trending From ef76d7aba3194b8b7641b7627a4b7b758cb4a312 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 21 Jul 2016 13:48:44 +0200 Subject: [PATCH 02/49] re-ordered tab in trending charts --- var/www/templates/Trending.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/var/www/templates/Trending.html b/var/www/templates/Trending.html index 9031076c..b03ec301 100644 --- a/var/www/templates/Trending.html +++ b/var/www/templates/Trending.html @@ -64,8 +64,8 @@
@@ -84,12 +84,12 @@ $SCRIPT_ROOT = {{ request.script_root|tojson|safe }};
{% include 'trending_graphs/Domainstrending.html' %}
-
- {% include 'trending_graphs/Wordstrending.html' %} -
{% include 'trending_graphs/Protocolstrending.html' %}
+
+ {% include 'trending_graphs/Wordstrending.html' %} +
From b076765cf4f8255b10ca300b583920d45c1d23cc Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 21 Jul 2016 16:35:49 +0200 Subject: [PATCH 03/49] Added dependency flot.time --- var/www/update_thirdparty.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh index 97a976e5..201c2b4f 100755 --- a/var/www/update_thirdparty.sh +++ b/var/www/update_thirdparty.sh @@ -36,6 +36,7 @@ wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTabl #Ressource for graph wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.js -O ./static/js/jquery.flot.js wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.pie.js -O ./static/js/jquery.flot.pie.js +wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.time.js -O ./static/js/jquery.flot.time.js mkdir -p ./static/image pushd static/image From dad05073fd41d40a9cffd97f29a125e46b7e151e Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 22 Jul 2016 09:25:05 +0200 Subject: [PATCH 04/49] Fixed bug introduced in merge-conflict --- bin/WebStats.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bin/WebStats.py b/bin/WebStats.py index eba2c0c4..30912df5 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -20,15 +20,12 @@ threshold_increase = 1.0 # The percentage representing the keyword occurence si max_set_cardinality = 10 # The cardinality of the progression set num_day_to_look = 5 # the detection of the progression start num_day_to_look in the past +def analyse(server, field_name, date, url_parsed): field = url_parsed[field_name] if field is not None: prev_score = server.hget(field, date) if prev_score is not None: - print field + ' prev_score:' + prev_score server.hset(field, date, int(prev_score) + 1) - if int(prev_score) + 1 > threshold_need_to_look: # threshold for false possitive - if(check_for_progression(server, field, date)): - to_plot.add(field) else: server.hset(field, date, 1) if field_name == "domain": #save domain in a set for the monthly plot From 06be1f129a5580fc2bac485a51567957b241d256 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Mon, 25 Jul 2016 16:38:57 +0200 Subject: [PATCH 05/49] Added Draft of ModuleStats + Paste size average --- bin/Credential.py | 23 ++- bin/CreditCard.py | 2 +- bin/Global.py | 2 +- bin/LAUNCH.sh | 2 + bin/Mail.py | 8 +- bin/ModuleStats.py | 168 ++++++++++++++++++ bin/packages/modules.cfg | 13 +- var/www/Flask_server.py | 71 ++++++++ var/www/templates/Moduletrending.html | 145 +++++++++++++++ var/www/templates/Trending.html | 1 + .../trending_graphs/Moduletrending.html | 71 ++++++++ 11 files changed, 497 insertions(+), 9 deletions(-) create mode 100755 bin/ModuleStats.py create mode 100644 var/www/templates/Moduletrending.html create mode 100644 var/www/templates/trending_graphs/Moduletrending.html diff --git a/bin/Credential.py b/bin/Credential.py index d81c9ff6..84fe49ee 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -17,6 +17,7 @@ if __name__ == "__main__": regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" + regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" while True: message = p.get_from_set() if message is None: @@ -37,7 +38,12 @@ if __name__ == "__main__": if len(creds) == 0: continue + sites_for_stats = [] + for elem in re.findall(regex_site_for_stats, content): + sites.append(elem[1:-1]) + sites = set(re.findall(regex_web, content)) + sites_for_stats = set(sites_for_stats) message = 'Checked {} credentials found.'.format(len(creds)) if sites: @@ -51,7 +57,22 @@ if __name__ == "__main__": print("========> Found more than 10 credentials in this file : {}".format(filepath)) publisher.warning(to_print) #Send to duplicate - p.populate_set_out(filepath) + p.populate_set_out(filepath, 'Duplicate') + + #Put in form, then send to moduleStats + creds_sites = {} + for cred in creds: + user_and_site, password = cred.split(':') + site = user_web.split('@')[1] + if site in sites: # if the parsing went fine + if site in creds_sites.keys(): # check if the key already exists + creds_sites[site] = creds_sites[web]+1 + else: + creds_sites[site] = 1 + for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats + print 'Credential;{};{};{}'.format(num, site, paste.p_date) + #p.populate_set_out('Credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') + if sites: print("=======> Probably on : {}".format(', '.join(sites))) else: diff --git a/bin/CreditCard.py b/bin/CreditCard.py index 18703f4e..de90f4d4 100755 --- a/bin/CreditCard.py +++ b/bin/CreditCard.py @@ -66,7 +66,7 @@ if __name__ == "__main__": publisher.warning('{}Checked {} valid number(s)'.format( to_print, len(creditcard_set))) #Send to duplicate - p.populate_set_out(filename) + p.populate_set_out(filepath, 'Redis_Duplicate') else: publisher.info('{}CreditCard related'.format(to_print)) else: diff --git a/bin/Global.py b/bin/Global.py index 8b6e482f..9cacbc88 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -52,7 +52,7 @@ if __name__ == '__main__': else: # TODO Store the name of the empty paste inside a Redis-list. print "Empty Paste: not processed" - publisher.debug("Empty Paste: {0} not processed".format(paste)) + publisher.debug("Empty Paste: {0} not processed".format(message)) continue else: print "Empty Queues: Waiting..." diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index d6706e1e..5afc3c2a 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -140,6 +140,8 @@ function launching_scripts { screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' sleep 0.1 screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' } #If no params, display the help diff --git a/bin/Mail.py b/bin/Mail.py index 964deb19..d3968442 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -61,9 +61,15 @@ if __name__ == "__main__": if MX_values[0] > is_critical: publisher.warning(to_print) #Send to duplicate - p.populate_set_out(filename) + p.populate_set_out(filename, 'Duplicate') + else: publisher.info(to_print) + #Send to ModuleStats + for mail in MX_values[1]: + print 'mail;{};{};{}'.format(1, mail, PST.p_date) + p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') + prec_filename = filename else: diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py new file mode 100755 index 00000000..705d84e7 --- /dev/null +++ b/bin/ModuleStats.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* +""" + Template for new modules +""" + +import time +import datetime +import redis +import os +from packages import lib_words +from packages.Date import Date +from pubsublogger import publisher +from Helper import Process +from packages import Paste + +# Config Var +max_set_cardinality = 7 +num_day_to_look = 5 + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + + +def compute_most_posted(server, message, num_day): + module, num, keyword, paste_date = message.split(';') + + redis_progression_name_set = 'top_'+ module +'_set' + + # Add/Update in Redis + prev_score = server.hget(paste_date, module+'-'+keyword) + if prev_score is not None: + server.hset(paste_date, module+'-'+keyword, int(prev_score) + int(num)) + else: + server.hset(paste_date, module+'-'+keyword, int(num)) + + # Compute Most Posted + date_range = get_date_range(num_day) + # check if this keyword is eligible for progression + keyword_total_sum = 0 + for date in date_range: + curr_value = server.hget(date, module+'-'+keyword) + keyword_total_sum += int(curr_value) if curr_value is not None else 0 + + if (server.scard(redis_progression_name_set) < max_set_cardinality): + server.sadd(redis_progression_name_set, keyword) + + else: #not in the set + #Check value for all members + member_set = [] + for keyw in server.smembers(redis_progression_name_set): + member_set.append((keyw, int(server.hget(paste_date, module+'-'+keyw)))) + member_set.sort(key=lambda tup: tup[1]) + if member_set[0][1] < keyword_total_sum: + #remove min from set and add the new one + print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server.srem(redis_progression_name_set, member_set[0][0]) + server.sadd(redis_progression_name_set, keyword) + + +def compute_provider_size(server, path, num_day_to_look): + + redis_progression_name_set = 'top_size_set' + paste = Paste.Paste(path) + + paste_size = paste._get_p_size() + paste_provider = paste.p_source + paste_date = paste._get_p_date() + new_avg = paste_size + + # Add/Update in Redis + prev_num_paste = server.hget(paste_provider+'_num', paste_date) + if prev_num_paste is not None: + server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1) + prev_sum_size = server.hget(paste_provider+'_size', paste_date) + + if prev_sum_size is not None: + server.hset(paste_provider+'_size', paste_date, paste_size) + new_avg = (float(prev_sum_size)+paste_size) / (int(prev_num_paste)+1) + else: + server.hset(paste_provider+'_size', paste_date, paste_size) + + else: + server.hset(paste_provider+'_num', paste_date, 1) + + # Compute Most Posted + #date_range = get_date_range(num_day_to_look) + # check if this keyword is eligible for progression + provider_total_sum = 0 + #for date in date_range: + # curr_value = server.hget(paste_provider+'_size', date) + # provider_total_sum += int(curr_value) if curr_value is not None else 0 + + #if paste_provider in server.smembers(redis_progression_name_set): # if it is already in the set + # return + + if (server.scard(redis_progression_name_set) < max_set_cardinality): + server.sadd(redis_progression_name_set, paste_provider) + + else: #not in the set + #Check value for all members + member_set = [] + for provider in server.smembers(redis_progression_name_set): + curr_avg = 0.0 + # for date in date_range: + curr_size = server.hget(provider+'_size', paste_date) + curr_num = server.hget(provider+'_num', paste_date) + print curr_size + if (curr_size is not None) and (curr_num is not None): + curr_avg += float(curr_size) / float(curr_num) + print str(curr_avg) + member_set.append((provider, curr_avg)) + member_set.sort(key=lambda tup: tup[1]) + if member_set[0][1] < new_avg: + #remove min from set and add the new one + print 'Adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server.srem(redis_progression_name_set, member_set[0][0]) + server.srem(redis_progression_name_set, member_set[0][0]) + server.sadd(redis_progression_name_set, paste_provider) + + + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'ModuleStats' + + # Setup the I/O queues + p = Process(config_section) + + # Sent to the logging a description of the module + publisher.info("Makes statistics about valid URL") + + # REDIS # + r_serv_trend = redis.StrictRedis( + host=p.config.get("Redis_Level_DB_Trending", "host"), + port=p.config.get("Redis_Level_DB_Trending", "port"), + db=p.config.get("Redis_Level_DB_Trending", "db")) + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + print 'sleeping' + time.sleep(2) + continue + + else: + # Do something with the message from the queue + print message.split(';') + if len(message.split(';')) > 1: + compute_most_posted(r_serv_trend, message, num_day_to_look) + else: + compute_provider_size(r_serv_trend, message, num_day_to_look) diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 5f087427..5f8a7f31 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -1,6 +1,6 @@ [Global] subscribe = ZMQ_Global -publish = Redis_Global +publish = Redis_Global,Redis_ModuleStats [Duplicates] subscribe = Redis_Duplicate @@ -30,12 +30,12 @@ subscribe = Redis_Global publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve [CreditCards] -subscribe = Redis_CreditCards -publish = Redis_Duplicate +subscribe = Redis_CreditCard +publish = Redis_Duplicate,Redis_ModuleStats [Mail] subscribe = Redis_Mail -publish = Redis_Duplicate +publish = Redis_Duplicate,Redis_ModuleStats [Onion] subscribe = Redis_Onion @@ -52,12 +52,15 @@ publish = Redis_Url,ZMQ_Url [WebStats] subscribe = Redis_Url +[ModuleStats] +subscribe = Redis_ModuleStats + [Release] subscribe = Redis_Global [Credential] subscribe = Redis_Credential -publish = Redis_Duplicate +publish = Redis_Duplicate,Redis_ModuleStats [Cve] subscribe = Redis_Cve diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 4bdee047..00ceaf14 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -163,6 +163,72 @@ def progressionCharts(): member_set.append(("No relevant data", int(100))) return jsonify(member_set) +@app.route("/_moduleCharts", methods=['GET']) +def modulesCharts(): + #To be used later + keyword_name = request.args.get('keywordName') + module_name = request.args.get('moduleName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(date, module_name+'-'+keyword_name) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) + return jsonify(bar_values) + + else: + redis_progression_name_set = 'top_'+ module_name +'_set' + + member_set = [] + for keyw in r_serv_charts.smembers(redis_progression_name_set): + redis_progression_name = module_name+'-'+keyw + keyw_value = r_serv_charts.hget(get_date_range(0)[0] ,redis_progression_name) + keyw_value = keyw_value if keyw_value is not None else 0 + member_set.append((keyw, int(keyw_value))) + member_set.sort(key=lambda tup: tup[1], reverse=True) + if len(member_set) == 0: + member_set.append(("No relevant data", int(100))) + return jsonify(member_set) + + +@app.route("/_sizeCharts", methods=['GET']) +def sizeCharts(): + #To be used later + keyword_name = request.args.get('keywordName') + module_name = request.args.get('moduleName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(keyword_name+'_size', date) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], float(curr_value if curr_value is not None else 0)]) + return jsonify(bar_values) + + else: + redis_progression_name_set = 'top_size_set' + + member_set = [] + for keyw in r_serv_charts.smembers(redis_progression_name_set): + redis_progression_name = keyw+'_size' + keyw_value = r_serv_charts.hget(redis_progression_name, get_date_range(0)[0]) + keyw_value = keyw_value if keyw_value is not None else 0 + member_set.append((keyw, float(keyw_value))) + member_set.sort(key=lambda tup: tup[1], reverse=True) + if len(member_set) == 0: + member_set.append(("No relevant data", float(100))) + return jsonify(member_set) + + @app.route("/search", methods=['POST']) def search(): @@ -227,6 +293,11 @@ def trending(): return render_template("Trending.html", default_display = default_display) +@app.route("/moduletrending/") +def moduletrending(): + return render_template("Moduletrending.html") + + @app.route("/showsavedpaste/") #completely shows the paste in a new tab def showsavedpaste(): return showpaste(0) diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html new file mode 100644 index 00000000..1240f5ef --- /dev/null +++ b/var/www/templates/Moduletrending.html @@ -0,0 +1,145 @@ + + + + + + + + Analysis Information Leak framework Dashboard + + + + + + + + + + + + + + + + +
+ +
+
+ {% include 'trending_graphs/Moduletrending.html' %} +
+ + + + + + +
+ + + + diff --git a/var/www/templates/Trending.html b/var/www/templates/Trending.html index 7c46cacc..177e5cc6 100644 --- a/var/www/templates/Trending.html +++ b/var/www/templates/Trending.html @@ -32,6 +32,7 @@
diff --git a/var/www/templates/trending_graphs/Moduletrending.html b/var/www/templates/trending_graphs/Moduletrending.html new file mode 100644 index 00000000..dfe78ba4 --- /dev/null +++ b/var/www/templates/trending_graphs/Moduletrending.html @@ -0,0 +1,71 @@ +
+
+
+
+
+
+ Credential - Most posted domain +
+
+
+
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+
+
+
+ Mail - Most posted domain (max 1 per paste) +
+
+
+
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+
+
+
+ Paste size - by paste provider +
+
+
+
+
+
+ +
+ +
+
+ +
+ +
+
From cc1b49baafc7d3ec26c1ad60dc41dea301607512 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Tue, 26 Jul 2016 10:45:02 +0200 Subject: [PATCH 06/49] Fixed variable bug in moduleStats and added draft of credential stats --- bin/Credential.py | 42 +++++++++++++-------------- bin/ModuleStats.py | 10 +++---- var/www/templates/Moduletrending.html | 10 +++---- 3 files changed, 29 insertions(+), 33 deletions(-) diff --git a/bin/Credential.py b/bin/Credential.py index 84fe49ee..29a418c7 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -1,10 +1,12 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* import time +import sys from packages import Paste from pubsublogger import publisher from Helper import Process import re +from pyfaup.faup import Faup if __name__ == "__main__": publisher.port = 6380 @@ -38,16 +40,12 @@ if __name__ == "__main__": if len(creds) == 0: continue - sites_for_stats = [] - for elem in re.findall(regex_site_for_stats, content): - sites.append(elem[1:-1]) - - sites = set(re.findall(regex_web, content)) - sites_for_stats = set(sites_for_stats) + sites= re.findall(regex_web, content) #Use to count occurences + sites_set = set(re.findall(regex_web, content)) message = 'Checked {} credentials found.'.format(len(creds)) - if sites: - message += ' Related websites: {}'.format(', '.join(sites)) + if sites_set: + message += ' Related websites: {}'.format(', '.join(sites_set)) to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message) @@ -59,21 +57,21 @@ if __name__ == "__main__": #Send to duplicate p.populate_set_out(filepath, 'Duplicate') - #Put in form, then send to moduleStats + #Put in form, count occurences, then send to moduleStats creds_sites = {} - for cred in creds: - user_and_site, password = cred.split(':') - site = user_web.split('@')[1] - if site in sites: # if the parsing went fine - if site in creds_sites.keys(): # check if the key already exists - creds_sites[site] = creds_sites[web]+1 - else: - creds_sites[site] = 1 - for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats - print 'Credential;{};{};{}'.format(num, site, paste.p_date) - #p.populate_set_out('Credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') + faup = Faup() + for url in sites: + faup.decode(url) + domain = faup.get()['domain'] + if domain in creds_sites.keys(): + creds_sites[domain] += 1 + else: + creds_sites[domain] = 1 - if sites: - print("=======> Probably on : {}".format(', '.join(sites))) + for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats + p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') + + if sites_set: + print("=======> Probably on : {}".format(', '.join(sites_set))) else: publisher.info(to_print) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 705d84e7..26ca5ce8 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -32,7 +32,6 @@ def compute_most_posted(server, message, num_day): module, num, keyword, paste_date = message.split(';') redis_progression_name_set = 'top_'+ module +'_set' - # Add/Update in Redis prev_score = server.hget(paste_date, module+'-'+keyword) if prev_score is not None: @@ -55,7 +54,9 @@ def compute_most_posted(server, message, num_day): #Check value for all members member_set = [] for keyw in server.smembers(redis_progression_name_set): - member_set.append((keyw, int(server.hget(paste_date, module+'-'+keyw)))) + keyw_value = server.hget(paste_date, module+'-'+keyw) + if keyw_value is not None: + member_set.append((keyw, int(keyw_value))) member_set.sort(key=lambda tup: tup[1]) if member_set[0][1] < keyword_total_sum: #remove min from set and add the new one @@ -111,10 +112,8 @@ def compute_provider_size(server, path, num_day_to_look): # for date in date_range: curr_size = server.hget(provider+'_size', paste_date) curr_num = server.hget(provider+'_num', paste_date) - print curr_size if (curr_size is not None) and (curr_num is not None): curr_avg += float(curr_size) / float(curr_num) - print str(curr_avg) member_set.append((provider, curr_avg)) member_set.sort(key=lambda tup: tup[1]) if member_set[0][1] < new_avg: @@ -156,12 +155,11 @@ if __name__ == '__main__': if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) print 'sleeping' - time.sleep(2) + time.sleep(20) continue else: # Do something with the message from the queue - print message.split(';') if len(message.split(';')) > 1: compute_most_posted(r_serv_trend, message, num_day_to_look) else: diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html index 1240f5ef..bd4218e3 100644 --- a/var/www/templates/Moduletrending.html +++ b/var/www/templates/Moduletrending.html @@ -81,11 +81,11 @@ }; - moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts"; + var moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts"; // Graph1 $.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day, function(data) { - temp_data_pie = []; + var temp_data_pie = []; for(i=0; i Date: Tue, 26 Jul 2016 11:06:31 +0200 Subject: [PATCH 07/49] Removed useless comments + prevent trying to add an already present element in the set --- bin/ModuleStats.py | 18 +++++++----------- .../trending_graphs/Moduletrending.html | 6 +++--- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 26ca5ce8..ce26c791 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -47,6 +47,9 @@ def compute_most_posted(server, message, num_day): curr_value = server.hget(date, module+'-'+keyword) keyword_total_sum += int(curr_value) if curr_value is not None else 0 + if keyword in server.smembers(redis_progression_name_set): # if it is already in the set + return + if (server.scard(redis_progression_name_set) < max_set_cardinality): server.sadd(redis_progression_name_set, keyword) @@ -91,25 +94,19 @@ def compute_provider_size(server, path, num_day_to_look): server.hset(paste_provider+'_num', paste_date, 1) # Compute Most Posted - #date_range = get_date_range(num_day_to_look) # check if this keyword is eligible for progression - provider_total_sum = 0 - #for date in date_range: - # curr_value = server.hget(paste_provider+'_size', date) - # provider_total_sum += int(curr_value) if curr_value is not None else 0 - #if paste_provider in server.smembers(redis_progression_name_set): # if it is already in the set - # return + if paste_provider in server.smembers(redis_progression_name_set): # if it is already in the set + return - if (server.scard(redis_progression_name_set) < max_set_cardinality): + elif (server.scard(redis_progression_name_set) < max_set_cardinality): server.sadd(redis_progression_name_set, paste_provider) - else: #not in the set + else: #set full capacity #Check value for all members member_set = [] for provider in server.smembers(redis_progression_name_set): curr_avg = 0.0 - # for date in date_range: curr_size = server.hget(provider+'_size', paste_date) curr_num = server.hget(provider+'_num', paste_date) if (curr_size is not None) and (curr_num is not None): @@ -120,7 +117,6 @@ def compute_provider_size(server, path, num_day_to_look): #remove min from set and add the new one print 'Adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' server.srem(redis_progression_name_set, member_set[0][0]) - server.srem(redis_progression_name_set, member_set[0][0]) server.sadd(redis_progression_name_set, paste_provider) diff --git a/var/www/templates/trending_graphs/Moduletrending.html b/var/www/templates/trending_graphs/Moduletrending.html index dfe78ba4..0a40df20 100644 --- a/var/www/templates/trending_graphs/Moduletrending.html +++ b/var/www/templates/trending_graphs/Moduletrending.html @@ -4,7 +4,7 @@
- Credential - Most posted domain + Credential - Today most posted domain
@@ -28,7 +28,7 @@
- Mail - Most posted domain (max 1 per paste) + Mail - Today most posted domain (max 1 per paste)
@@ -52,7 +52,7 @@
- Paste size - by paste provider + Provider - Today average paste size
From c07c12088dbf097dade6e7478f6bd19c6f6d902a Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Tue, 26 Jul 2016 12:07:52 +0200 Subject: [PATCH 08/49] In moduleStat, display old data if there is no data today and notify the user. --- var/www/Flask_server.py | 28 ++++++++++++------- var/www/templates/Moduletrending.html | 14 +++++++++- .../trending_graphs/Moduletrending.html | 10 +++---- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 00ceaf14..22388cfb 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -116,7 +116,23 @@ def get_date_range(num_day): date_list.append(date.substract_day(i)) return date_list - +def get_top_relevant_data(server, module_name): + redis_progression_name_set = 'top_'+ module_name +'_set' + days = 0 + for date in get_date_range(15): + member_set = [] + for keyw in server.smembers(redis_progression_name_set): + redis_progression_name = module_name+'-'+keyw + keyw_value = server.hget(date ,redis_progression_name) + keyw_value = keyw_value if keyw_value is not None else 0 + member_set.append((keyw, int(keyw_value))) + member_set.sort(key=lambda tup: tup[1], reverse=True) + if member_set[0][1] == 0: #No data for this date + days += 1 + continue + else: + member_set.insert(0, ("passed_days", days)) + return member_set @@ -182,15 +198,7 @@ def modulesCharts(): return jsonify(bar_values) else: - redis_progression_name_set = 'top_'+ module_name +'_set' - - member_set = [] - for keyw in r_serv_charts.smembers(redis_progression_name_set): - redis_progression_name = module_name+'-'+keyw - keyw_value = r_serv_charts.hget(get_date_range(0)[0] ,redis_progression_name) - keyw_value = keyw_value if keyw_value is not None else 0 - member_set.append((keyw, int(keyw_value))) - member_set.sort(key=lambda tup: tup[1], reverse=True) + member_set = get_top_relevant_data(r_serv_charts, module_name) if len(member_set) == 0: member_set.append(("No relevant data", int(100))) return jsonify(member_set) diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html index bd4218e3..ee49c428 100644 --- a/var/www/templates/Moduletrending.html +++ b/var/www/templates/Moduletrending.html @@ -87,7 +87,19 @@ function(data) { var temp_data_pie = []; for(i=0; i 0 && data[0][1] < 7){ + $("#day-"+module_name).text(data[0][1] + " Day(s) ago "); + $("#panel-"+module_name).removeClass("panel-default") + $("#panel-"+module_name).addClass("panel-warning") + } else if (data[0][1] > 6) { + $("#day-"+module_name).text(data[0][1] + " Day(s) ago "); + $("#panel-"+module_name).removeClass("panel-default") + $("#panel-"+module_name).addClass("panel-danger") + } + } else { + temp_data_pie.push({label: data[i][0], data: data[i][1]}); + } } $.plot($("#flot-pie-chart-"+module_name), temp_data_pie, options); diff --git a/var/www/templates/trending_graphs/Moduletrending.html b/var/www/templates/trending_graphs/Moduletrending.html index 0a40df20..4c3477cc 100644 --- a/var/www/templates/trending_graphs/Moduletrending.html +++ b/var/www/templates/trending_graphs/Moduletrending.html @@ -2,9 +2,9 @@
-
+
- Credential - Today most posted domain + Credential - Today most posted domain
@@ -26,9 +26,9 @@
-
+
- Mail - Today most posted domain (max 1 per paste) + Mail - Today most posted domain (max 1 per paste)
@@ -52,7 +52,7 @@
- Provider - Today average paste size + Provider - Today average paste size
From 5bab1a6f171812aa52269599eac8d6e7c4b72201 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Tue, 26 Jul 2016 12:12:26 +0200 Subject: [PATCH 09/49] Added the click on a part in module stats --- var/www/templates/Moduletrending.html | 4 ++-- var/www/templates/trending_graphs/Moduletrending.html | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html index ee49c428..ba36703f 100644 --- a/var/www/templates/Moduletrending.html +++ b/var/www/templates/Moduletrending.html @@ -91,11 +91,11 @@ if (data[0][1] > 0 && data[0][1] < 7){ $("#day-"+module_name).text(data[0][1] + " Day(s) ago "); $("#panel-"+module_name).removeClass("panel-default") - $("#panel-"+module_name).addClass("panel-warning") + $("#panel-"+module_name).addClass("panel-info") } else if (data[0][1] > 6) { $("#day-"+module_name).text(data[0][1] + " Day(s) ago "); $("#panel-"+module_name).removeClass("panel-default") - $("#panel-"+module_name).addClass("panel-danger") + $("#panel-"+module_name).addClass("panel-warning") } } else { temp_data_pie.push({label: data[i][0], data: data[i][1]}); diff --git a/var/www/templates/trending_graphs/Moduletrending.html b/var/www/templates/trending_graphs/Moduletrending.html index 4c3477cc..133fe137 100644 --- a/var/www/templates/trending_graphs/Moduletrending.html +++ b/var/www/templates/trending_graphs/Moduletrending.html @@ -9,7 +9,7 @@
-
+
Click on a part
@@ -33,7 +33,7 @@
-
+
Click on a part
@@ -57,7 +57,7 @@
-
+
Click on a part
From 63774cd1604c386291f1f0cb6520f1ee5ac96fc3 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Tue, 26 Jul 2016 16:56:52 +0200 Subject: [PATCH 10/49] In moduleStats, if there is data which have small percentage, display it in an 'other' part + Added stacked chart for them (still draft) --- var/www/templates/Moduletrending.html | 120 +++++++++++++++--- .../trending_graphs/Moduletrending.html | 15 ++- 2 files changed, 110 insertions(+), 25 deletions(-) diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html index ba36703f..ef8da227 100644 --- a/var/www/templates/Moduletrending.html +++ b/var/www/templates/Moduletrending.html @@ -18,6 +18,7 @@ + @@ -72,16 +73,41 @@ var chart_2_num_day = 15; $SCRIPT_ROOT = {{ request.script_root|tojson|safe }}; + +function labelFormatter(label, series) { + return "
" ++ label + "
" + Math.round(series.percent) + "%
"; +} + function plot_top_graph(module_name){ /**** Flot Pie Chart ****/ var options = { - series: { pie: { show: true } }, + series: { pie: { show: true, + radius: 3/5, + combine: { + color: '#999', + threshold: 0.05 + }, + label: { + show: true, + radius: 1, + formatter: labelFormatter, + background: { + opacity: 0.5, + color: '#000' + } + } + } + }, grid: { hoverable: true, clickable: true }, - legend: { show: false } + legend: { show: false }, + }; var moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts"; + var tot_sum = 0; + var data_other = []; // Graph1 $.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day, function(data) { @@ -90,19 +116,23 @@ if (i==0 && data[0][0] == "passed_days"){ //If there is no data today, take it from the past if (data[0][1] > 0 && data[0][1] < 7){ $("#day-"+module_name).text(data[0][1] + " Day(s) ago "); - $("#panel-"+module_name).removeClass("panel-default") - $("#panel-"+module_name).addClass("panel-info") + $("#panel-"+module_name).removeClass("panel-green") + $("#panel-"+module_name).addClass("panel-yellow") } else if (data[0][1] > 6) { $("#day-"+module_name).text(data[0][1] + " Day(s) ago "); - $("#panel-"+module_name).removeClass("panel-default") - $("#panel-"+module_name).addClass("panel-warning") + $("#panel-"+module_name).removeClass("panel-green") + $("#panel-"+module_name).addClass("panel-red") } } else { temp_data_pie.push({label: data[i][0], data: data[i][1]}); + tot_sum += data[i][1] } } + for(i=0; i diff --git a/var/www/templates/trending_graphs/Moduletrending.html b/var/www/templates/trending_graphs/Moduletrending.html index 133fe137..5ef90a23 100644 --- a/var/www/templates/trending_graphs/Moduletrending.html +++ b/var/www/templates/trending_graphs/Moduletrending.html @@ -2,9 +2,10 @@
-
+
- Credential - Today most posted domain + Credential - most posted domain + Today
@@ -26,9 +27,10 @@
-
+
- Mail - Today most posted domain (max 1 per paste) + Mail - most posted domain (max 1 per paste) + Today
@@ -50,9 +52,10 @@
-
+
- Provider - Today average paste size + Provider - average paste size + Today
From 240bdd9cca69126ac101c75eec7ef2e61a6fbab1 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Wed, 27 Jul 2016 09:27:32 +0200 Subject: [PATCH 11/49] Added number of paste by provider in module stats --- bin/ModuleStats.py | 20 ++++++------ var/www/Flask_server.py | 32 +++++++++++++++++++ var/www/templates/Moduletrending.html | 3 +- .../trending_graphs/Moduletrending.html | 8 ++++- 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index ce26c791..cfafedd0 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -68,9 +68,10 @@ def compute_most_posted(server, message, num_day): server.sadd(redis_progression_name_set, keyword) -def compute_provider_size(server, path, num_day_to_look): +def compute_provider_info(server, path, num_day_to_look): - redis_progression_name_set = 'top_size_set' + redis_avg_size_name_set = 'top_size_set' + redis_providers_name_set = 'providers_set' paste = Paste.Paste(path) paste_size = paste._get_p_size() @@ -79,6 +80,7 @@ def compute_provider_size(server, path, num_day_to_look): new_avg = paste_size # Add/Update in Redis + server.sadd(redis_providers_name_set, paste_provider) prev_num_paste = server.hget(paste_provider+'_num', paste_date) if prev_num_paste is not None: server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1) @@ -96,16 +98,16 @@ def compute_provider_size(server, path, num_day_to_look): # Compute Most Posted # check if this keyword is eligible for progression - if paste_provider in server.smembers(redis_progression_name_set): # if it is already in the set + if paste_provider in server.smembers(redis_avg_size_name_set): # if it is already in the set return - elif (server.scard(redis_progression_name_set) < max_set_cardinality): - server.sadd(redis_progression_name_set, paste_provider) + elif (server.scard(redis_avg_size_name_set) < max_set_cardinality): + server.sadd(redis_avg_size_name_set, paste_provider) else: #set full capacity #Check value for all members member_set = [] - for provider in server.smembers(redis_progression_name_set): + for provider in server.smembers(redis_avg_size_name_set): curr_avg = 0.0 curr_size = server.hget(provider+'_size', paste_date) curr_num = server.hget(provider+'_num', paste_date) @@ -116,8 +118,8 @@ def compute_provider_size(server, path, num_day_to_look): if member_set[0][1] < new_avg: #remove min from set and add the new one print 'Adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' - server.srem(redis_progression_name_set, member_set[0][0]) - server.sadd(redis_progression_name_set, paste_provider) + server.srem(redis_avg_size_name_set, member_set[0][0]) + server.sadd(redis_avg_size_name_set, paste_provider) @@ -159,4 +161,4 @@ if __name__ == '__main__': if len(message.split(';')) > 1: compute_most_posted(r_serv_trend, message, num_day_to_look) else: - compute_provider_size(r_serv_trend, message, num_day_to_look) + compute_provider_info(r_serv_trend, message, num_day_to_look) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 22388cfb..446d051a 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -237,6 +237,38 @@ def sizeCharts(): return jsonify(member_set) +@app.route("/_numberChart", methods=['GET']) +def numberChart(): + #To be used later + keyword_name = request.args.get('keywordName') + module_name = request.args.get('moduleName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(keyword_name+'_num', date) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) + return jsonify(bar_values) + + else: + redis_provider_name_set = 'providers_set' + + member_set = [] + for keyw in r_serv_charts.smembers(redis_provider_name_set): + redis_provider_name = keyw+'_num' + keyw_value = r_serv_charts.hget(redis_provider_name, get_date_range(0)[0]) + keyw_value = keyw_value if keyw_value is not None else 0 + member_set.append((keyw, int(keyw_value))) + member_set.sort(key=lambda tup: tup[1], reverse=True) + if len(member_set) == 0: + member_set.append(("No relevant data", int(100))) + return jsonify(member_set) + @app.route("/search", methods=['POST']) def search(): diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html index ef8da227..6a1f38ee 100644 --- a/var/www/templates/Moduletrending.html +++ b/var/www/templates/Moduletrending.html @@ -65,6 +65,7 @@ plot_top_graph("credential"); plot_top_graph("mail"); plot_top_graph("size"); + plot_top_graph("number"); }); @@ -105,7 +106,7 @@ function labelFormatter(label, series) { }; - var moduleCharts = "size" == module_name ? "sizeCharts" : "moduleCharts"; + var moduleCharts = "size" == module_name ? "sizeCharts" : ("number" == module_name ? "numberChart" : "moduleCharts"); var tot_sum = 0; var data_other = []; // Graph1 diff --git a/var/www/templates/trending_graphs/Moduletrending.html b/var/www/templates/trending_graphs/Moduletrending.html index 5ef90a23..339676dd 100644 --- a/var/www/templates/trending_graphs/Moduletrending.html +++ b/var/www/templates/trending_graphs/Moduletrending.html @@ -54,13 +54,19 @@
- Provider - average paste size + Provider Today
+

Average paste size by provider

Click on a part
+ +

Number of paste by provider

+
+
Click on a part
+
From 27e0d5c578407ad58b2f6f48387efb2c1f146385 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Wed, 27 Jul 2016 11:26:56 +0200 Subject: [PATCH 12/49] Clean up and refactoring for front-end moduleStats --- var/www/static/js/moduleTrending.js | 174 ++++++++++++++++ var/www/templates/Moduletrending.html | 193 ++---------------- .../trending_graphs/Moduletrending.html | 6 +- 3 files changed, 198 insertions(+), 175 deletions(-) create mode 100644 var/www/static/js/moduleTrending.js diff --git a/var/www/static/js/moduleTrending.js b/var/www/static/js/moduleTrending.js new file mode 100644 index 00000000..10487a5b --- /dev/null +++ b/var/www/static/js/moduleTrending.js @@ -0,0 +1,174 @@ +/* Already defined variable (Before the input) +* +* var chart_1_num_day = 5; +* var chart_2_num_day = 15; +* +*/ + +var pie_threshold = 0.05 +var options = { + series: { pie: { show: true, + radius: 3/5, + combine: { + color: '#999', + threshold: pie_threshold + }, + label: { + show: true, + radius: 1, + formatter: labelFormatter, + background: { + opacity: 0.5, + color: '#000' + } + } + } + }, + grid: { hoverable: true, clickable: true }, + legend: { show: false }, + }; + +function labelFormatter(label, series) { + return "
" + + label + "
" + Math.round(series.percent) + "%
"; +} + + +function plot_top_graph(module_name){ + + /**** Pie Chart ****/ + + // moduleCharts is used the decide the url to request data + var moduleCharts = "size" == module_name ? "sizeCharts" : ("number" == module_name ? "numberChart" : "moduleCharts"); + var tot_sum = 0; // used to detect elements putted in 'Other' pie's part + var data_other = []; // used to detect elements putted in 'Other' pie's part + + + $.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day, + function(data) { + var temp_data_pie = []; + for(i=0; i 0 && data[0][1] < 7){ // If data is [1:6] day(s) old, put the panel in yellow + $("#day-"+module_name).text(data[0][1] + " Day(s) ago "); + $("#panel-"+module_name).removeClass("panel-green") + $("#panel-"+module_name).addClass("panel-yellow") + } else if (data[0][1] > 6) { // data old of more than 7 days, put the panel in red + $("#day-"+module_name).text(data[0][1] + " Day(s) ago "); + $("#panel-"+module_name).removeClass("panel-green") + $("#panel-"+module_name).addClass("panel-red") + } + } else { + temp_data_pie.push({label: data[i][0], data: data[i][1]}); + tot_sum += data[i][1] + } + } + for(i=0; i + + + - -
diff --git a/var/www/templates/trending_graphs/Moduletrending.html b/var/www/templates/trending_graphs/Moduletrending.html index 339676dd..8eaba73b 100644 --- a/var/www/templates/trending_graphs/Moduletrending.html +++ b/var/www/templates/trending_graphs/Moduletrending.html @@ -4,7 +4,7 @@
- Credential - most posted domain + Credential - most posted domain Today
@@ -29,7 +29,7 @@
- Mail - most posted domain (max 1 per paste) + Mail - most posted domain (max 1 per paste) Today
@@ -54,7 +54,7 @@
- Provider + Provider Today
From 5a02a7ffce418ded2dd66b630ff1a658080723cc Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Wed, 27 Jul 2016 11:55:57 +0200 Subject: [PATCH 13/49] Clean up and refactoring for back-end moduleStats --- var/www/Flask_server.py | 57 +++++-------------- var/www/static/js/moduleTrending.js | 2 +- var/www/templates/Moduletrending.html | 2 +- .../trending_graphs/Moduletrending.html | 4 +- 4 files changed, 17 insertions(+), 48 deletions(-) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 446d051a..30fdb4e2 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -116,6 +116,8 @@ def get_date_range(num_day): date_list.append(date.substract_day(i)) return date_list +# Iterate over elements in the module provided and return the today data or the last data +# return format: [('passed_days', num_of_passed_days), ('elem_name1', elem_value1), ('elem_name2', elem_value2)]] def get_top_relevant_data(server, module_name): redis_progression_name_set = 'top_'+ module_name +'_set' days = 0 @@ -149,7 +151,6 @@ def stuff(): @app.route("/_progressionCharts", methods=['GET']) def progressionCharts(): - #To be used later attribute_name = request.args.get('attributeName') trending_name = request.args.get('trendingName') bar_requested = True if request.args.get('bar') == "true" else False @@ -169,6 +170,7 @@ def progressionCharts(): redis_progression_name = 'top_progression_'+trending_name redis_progression_name_set = 'top_progression_'+trending_name+'_set' + # Iterate over element in top_x_set and retreive their value member_set = [] for keyw in r_serv_charts.smembers(redis_progression_name_set): keyw_value = r_serv_charts.hget(redis_progression_name, keyw) @@ -181,7 +183,6 @@ def progressionCharts(): @app.route("/_moduleCharts", methods=['GET']) def modulesCharts(): - #To be used later keyword_name = request.args.get('keywordName') module_name = request.args.get('moduleName') bar_requested = True if request.args.get('bar') == "true" else False @@ -204,9 +205,8 @@ def modulesCharts(): return jsonify(member_set) -@app.route("/_sizeCharts", methods=['GET']) -def sizeCharts(): - #To be used later +@app.route("/_providersChart", methods=['GET']) +def providersChart(): keyword_name = request.args.get('keywordName') module_name = request.args.get('moduleName') bar_requested = True if request.args.get('bar') == "true" else False @@ -218,18 +218,19 @@ def sizeCharts(): date_range = get_date_range(num_day) # Retreive all data from the last num_day for date in date_range: - curr_value = r_serv_charts.hget(keyword_name+'_size', date) - bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], float(curr_value if curr_value is not None else 0)]) + curr_value = r_serv_charts.hget(keyword_name+'_'+module_name, date) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], float(curr_value if curr_value is not None else 0.0)]) return jsonify(bar_values) else: - redis_progression_name_set = 'top_size_set' + redis_provider_name_set = 'top_size_set' if module_name == "size" else 'providers_set' + # Iterate over element in top_x_set and retreive their value member_set = [] - for keyw in r_serv_charts.smembers(redis_progression_name_set): - redis_progression_name = keyw+'_size' - keyw_value = r_serv_charts.hget(redis_progression_name, get_date_range(0)[0]) - keyw_value = keyw_value if keyw_value is not None else 0 + for keyw in r_serv_charts.smembers(redis_provider_name_set): + redis_provider_name = keyw+'_'+module_name + keyw_value = r_serv_charts.hget(redis_provider_name, get_date_range(0)[0]) + keyw_value = keyw_value if keyw_value is not None else 0.0 member_set.append((keyw, float(keyw_value))) member_set.sort(key=lambda tup: tup[1], reverse=True) if len(member_set) == 0: @@ -237,38 +238,6 @@ def sizeCharts(): return jsonify(member_set) -@app.route("/_numberChart", methods=['GET']) -def numberChart(): - #To be used later - keyword_name = request.args.get('keywordName') - module_name = request.args.get('moduleName') - bar_requested = True if request.args.get('bar') == "true" else False - - if (bar_requested): - num_day = int(request.args.get('days')) - bar_values = [] - - date_range = get_date_range(num_day) - # Retreive all data from the last num_day - for date in date_range: - curr_value = r_serv_charts.hget(keyword_name+'_num', date) - bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) - return jsonify(bar_values) - - else: - redis_provider_name_set = 'providers_set' - - member_set = [] - for keyw in r_serv_charts.smembers(redis_provider_name_set): - redis_provider_name = keyw+'_num' - keyw_value = r_serv_charts.hget(redis_provider_name, get_date_range(0)[0]) - keyw_value = keyw_value if keyw_value is not None else 0 - member_set.append((keyw, int(keyw_value))) - member_set.sort(key=lambda tup: tup[1], reverse=True) - if len(member_set) == 0: - member_set.append(("No relevant data", int(100))) - return jsonify(member_set) - @app.route("/search", methods=['POST']) def search(): diff --git a/var/www/static/js/moduleTrending.js b/var/www/static/js/moduleTrending.js index 10487a5b..e43887ec 100644 --- a/var/www/static/js/moduleTrending.js +++ b/var/www/static/js/moduleTrending.js @@ -39,7 +39,7 @@ function plot_top_graph(module_name){ /**** Pie Chart ****/ // moduleCharts is used the decide the url to request data - var moduleCharts = "size" == module_name ? "sizeCharts" : ("number" == module_name ? "numberChart" : "moduleCharts"); + var moduleCharts = "size" == module_name ? "providersChart" : ("num" == module_name ? "providersChart" : "moduleCharts"); var tot_sum = 0; // used to detect elements putted in 'Other' pie's part var data_other = []; // used to detect elements putted in 'Other' pie's part diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html index 8500ae42..66e2f4e4 100644 --- a/var/www/templates/Moduletrending.html +++ b/var/www/templates/Moduletrending.html @@ -71,7 +71,7 @@ plot_top_graph("credential"); plot_top_graph("mail"); plot_top_graph("size"); - plot_top_graph("number"); + plot_top_graph("num"); setTimeout(refreshPlot, 10000); } diff --git a/var/www/templates/trending_graphs/Moduletrending.html b/var/www/templates/trending_graphs/Moduletrending.html index 8eaba73b..94dd8c27 100644 --- a/var/www/templates/trending_graphs/Moduletrending.html +++ b/var/www/templates/trending_graphs/Moduletrending.html @@ -64,8 +64,8 @@
Click on a part

Number of paste by provider

-
-
Click on a part
+
+
Click on a part
From acec508f53196ab26d1db087827de2ddeb26f36a Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 28 Jul 2016 09:52:07 +0200 Subject: [PATCH 14/49] fixed multiple binding bug in moduleStats + Added multiple bar display for 'other pie part' + Added ModuleStats tab in all other pages --- var/www/Flask_server.py | 2 + var/www/static/js/moduleTrending.js | 63 +++++++++---------- var/www/templates/Moduletrending.html | 16 ++--- var/www/templates/Trending.html | 2 +- var/www/templates/index.html | 1 + .../trending_graphs/Moduletrending.html | 16 ++--- 6 files changed, 51 insertions(+), 49 deletions(-) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 30fdb4e2..baae91cf 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -196,6 +196,7 @@ def modulesCharts(): for date in date_range: curr_value = r_serv_charts.hget(date, module_name+'-'+keyword_name) bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) + bar_values.insert(0, keyword_name) return jsonify(bar_values) else: @@ -220,6 +221,7 @@ def providersChart(): for date in date_range: curr_value = r_serv_charts.hget(keyword_name+'_'+module_name, date) bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], float(curr_value if curr_value is not None else 0.0)]) + bar_values.insert(0, keyword_name) return jsonify(bar_values) else: diff --git a/var/www/static/js/moduleTrending.js b/var/www/static/js/moduleTrending.js index e43887ec..9d61f518 100644 --- a/var/www/static/js/moduleTrending.js +++ b/var/www/static/js/moduleTrending.js @@ -34,17 +34,17 @@ function labelFormatter(label, series) { } -function plot_top_graph(module_name){ +function plot_top_graph(module_name, init){ /**** Pie Chart ****/ // moduleCharts is used the decide the url to request data var moduleCharts = "size" == module_name ? "providersChart" : ("num" == module_name ? "providersChart" : "moduleCharts"); - var tot_sum = 0; // used to detect elements putted in 'Other' pie's part - var data_other = []; // used to detect elements putted in 'Other' pie's part + var tot_sum = 0; // used to detect elements placed in 'Other' pie's part + var data_other = []; // used to detect elements placed in 'Other' pie's part - $.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day, + var createPie = $.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day, function(data) { var temp_data_pie = []; for(i=0; i
@@ -62,18 +62,18 @@ - diff --git a/var/www/templates/trending_graphs/Moduletrending.html b/var/www/templates/trending_graphs/Moduletrending.html index 9c970d66..ca5f2152 100644 --- a/var/www/templates/trending_graphs/Moduletrending.html +++ b/var/www/templates/trending_graphs/Moduletrending.html @@ -8,7 +8,10 @@ Today
-
+
+
+
+
Click on a part
@@ -33,7 +36,10 @@ Today
-
+
+
+
+
Click on a part
@@ -59,13 +65,23 @@
-

Average paste size by provider

-
-
Click on a part
+
+

Average paste size by provider

+
+
+
+
+
Click on a part
+
-

Number of paste by provider

-
-
Click on a part
+
+

Number of paste by provider

+
+
+
+
+
Click on a part
+
From c2bf2ba630439cf22de9d8e673f46f59b7e7ec01 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 28 Jul 2016 16:35:30 +0200 Subject: [PATCH 16/49] Added tooltip in moduleStat above each bar chart + Added highlight of the selected bar. (Still not working for category 'Other') --- var/www/static/js/moduleTrending.js | 106 ++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 20 deletions(-) diff --git a/var/www/static/js/moduleTrending.js b/var/www/static/js/moduleTrending.js index 97b4af38..d5c8b378 100644 --- a/var/www/static/js/moduleTrending.js +++ b/var/www/static/js/moduleTrending.js @@ -28,6 +28,9 @@ var options = { legend: { show: false }, }; +var plot_data_old = [] +var plot_old = [] + function labelFormatter(label, series) { return "
" + label + "
" + Math.round(series.percent) + "%
"; @@ -43,7 +46,6 @@ function plot_top_graph(module_name, init){ var tot_sum = 0; // used to detect elements placed in 'Other' pie's part var data_other = []; // used to detect elements placed in 'Other' pie's part - var createPie = $.getJSON($SCRIPT_ROOT+"/_"+moduleCharts+"?moduleName="+module_name+"&num_day="+chart_1_num_day, function(data) { var temp_data_pie = []; @@ -75,22 +77,17 @@ function plot_top_graph(module_name, init){ if (item == null) return; var clicked_label = item.series.label; - update_bar_chart(moduleCharts, "#flot-bar-chart-"+module_name, clicked_label, item.series.color, chart_1_num_day, "%m/%d"); - - - $("#flot-bar-chart-"+module_name).bind("plothover", function (event, pos, item) { - if (item) { - var x = item.datapoint[0].toFixed(2); - var y = item.datapoint[1].toFixed(2); - var date = new Date(parseInt(x)); - date = date.getMonth()+'/'+date.getDate(); - $("#tooltip_graph-"+module_name).html(item.series.label + " of " + date + " = " + y+"") - .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) - .fadeIn(200); - } else { - } - }); + if (module_name == "size"){ + update_bar_chart(moduleCharts, module_name, "#flot-bar-chart-"+module_name, clicked_label, item.series.color, chart_1_num_day, "%m/%d", false); + update_bar_chart(moduleCharts, "num", "#flot-bar-chart-"+"num", clicked_label, item.series.color, chart_1_num_day, "%m/%d", true); + } + else if (module_name == "num"){ + update_bar_chart(moduleCharts, module_name, "#flot-bar-chart-"+module_name, clicked_label, item.series.color, chart_1_num_day, "%m/%d", false); + update_bar_chart(moduleCharts, "size", "#flot-bar-chart-"+"size", clicked_label, item.series.color, chart_1_num_day, "%m/%d", true); + } else { + update_bar_chart(moduleCharts, module_name, "#flot-bar-chart-"+module_name, clicked_label, item.series.color, chart_1_num_day, "%m/%d", true); + } }); } }); @@ -98,7 +95,7 @@ function plot_top_graph(module_name, init){ /**** Bar Chart ****/ - function update_bar_chart(chartUrl, chartID, involved_item, serie_color, num_day, timeformat){ + function update_bar_chart(chartUrl, module_name, chartID, involved_item, serie_color, num_day, timeformat, can_bind){ var barOptions = { series: { bars: { show: true, barWidth: 82800000 }, @@ -167,8 +164,6 @@ function plot_top_graph(module_name, init){ tooltipOpts: { content: "x: %x, y: %y" } }) - - }); } else { // Normal pie's part clicked @@ -185,8 +180,79 @@ function plot_top_graph(module_name, init){ data: temp_data_bar, color: serie_color }; - $.plot($(chartID), [barData], barOptions); + var plot = $.plot($(chartID), [barData], barOptions); + if (plot_data_old.length<2){ + plot_data_old.push(plot.getData()); + plot_old.push(plot); + } else { + plot_data_old = []; + plot_old = []; + plot_data_old.push(plot.getData()); + plot_old.push(plot); + } + if (can_bind){ + binder(module_name); + if (module_name == "size") + binder("num"); + else if (module_name == "num") + binder("size"); + } }); } }; } + +function binder(module_name){ +console.log(module_name); +console.log(plot_data_old); + $("#flot-bar-chart-"+module_name).bind("plothover", function (event, pos, item) { + if (item) { + var x = item.datapoint[0] + var y = item.datapoint[1] + var date = new Date(parseInt(x)); + var formated_date = date.getMonth()+'/'+date.getDate(); + + $("#tooltip_graph-"+module_name).html(item.series.label + " of " + formated_date + " = " + y+"") + .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) + .fadeIn(200); + + var plot_other = plot_data_old[0]; + if (plot_other.length > 0){ + var data_other = plot_other[0].data; + for(i=0; i" + curr_data_other+"") + .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) + .fadeIn(200); + for(i=0; i" + curr_data_other+"") + .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) + .fadeIn(200); + for(i=0; i Date: Fri, 29 Jul 2016 10:36:44 +0200 Subject: [PATCH 17/49] Fixed small race condition bug in ModuleStat.py and Added highlight of bar belonging to the same serie in moduleStats.js --- bin/ModuleStats.py | 11 ++-- var/www/static/js/moduleTrending.js | 96 +++++++++++++++++------------ 2 files changed, 62 insertions(+), 45 deletions(-) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index cfafedd0..8fed1949 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -61,11 +61,12 @@ def compute_most_posted(server, message, num_day): if keyw_value is not None: member_set.append((keyw, int(keyw_value))) member_set.sort(key=lambda tup: tup[1]) - if member_set[0][1] < keyword_total_sum: - #remove min from set and add the new one - print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' - server.srem(redis_progression_name_set, member_set[0][0]) - server.sadd(redis_progression_name_set, keyword) + if len(member_set) > 0: + if member_set[0][1] < keyword_total_sum: + #remove min from set and add the new one + print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server.srem(redis_progression_name_set, member_set[0][0]) + server.sadd(redis_progression_name_set, keyword) def compute_provider_info(server, path, num_day_to_look): diff --git a/var/www/static/js/moduleTrending.js b/var/www/static/js/moduleTrending.js index d5c8b378..afb764c4 100644 --- a/var/www/static/js/moduleTrending.js +++ b/var/www/static/js/moduleTrending.js @@ -67,7 +67,7 @@ function plot_top_graph(module_name, init){ } for(i=0; i" + y+"") .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) .fadeIn(200); - - var plot_other = plot_data_old[0]; - if (plot_other.length > 0){ - var data_other = plot_other[0].data; + var plot_obj = plot_data_old[0]; //contain series + for(serie=0; serie" + curr_data_other+"") - .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) - .fadeIn(200); - for(i=0; i" + curr_data_other+"") - .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) - .fadeIn(200); - for(i=0; i" + curr_data_other+"") + .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) + .fadeIn(200); + for(i=0; i" + curr_data_other+"") + .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) + .fadeIn(200); + for(i=0; i Date: Fri, 29 Jul 2016 14:30:41 +0200 Subject: [PATCH 18/49] Big refactoring and added lot of comments + fixed bug attaching multiple binder on barChart --- var/www/static/js/moduleTrending.js | 232 +++++++++++------- .../trending_graphs/Moduletrending.html | 8 +- 2 files changed, 141 insertions(+), 99 deletions(-) diff --git a/var/www/static/js/moduleTrending.js b/var/www/static/js/moduleTrending.js index afb764c4..076a2e0b 100644 --- a/var/www/static/js/moduleTrending.js +++ b/var/www/static/js/moduleTrending.js @@ -5,31 +5,36 @@ * */ -var pie_threshold = 0.05 -var options = { - series: { pie: { show: true, - radius: 3/5, - combine: { - color: '#999', - threshold: pie_threshold - }, - label: { - show: true, - radius: 1, - formatter: labelFormatter, - background: { - opacity: 0.5, - color: '#000' + +/* VARIABLES */ + var pie_threshold = 0.05 + var options = { + series: { pie: { show: true, + radius: 3/5, + combine: { + color: '#999', + threshold: pie_threshold + }, + label: { + show: true, + radius: 1, + formatter: labelFormatter, + background: { + opacity: 0.5, + color: '#000' + } } } - } - }, - grid: { hoverable: true, clickable: true }, - legend: { show: false }, - }; + }, + grid: { hoverable: true, clickable: true }, + legend: { show: false }, + }; + + /* Linked graph - remember the data */ + var plot_data_old = [] + var plot_old = [] -var plot_data_old = [] -var plot_old = [] +/* FUNCTIONS */ function labelFormatter(label, series) { return "
" @@ -37,6 +42,7 @@ function labelFormatter(label, series) { } +/* Plot, and bind chart listener */ function plot_top_graph(module_name, init){ /**** Pie Chart ****/ @@ -67,26 +73,31 @@ function plot_top_graph(module_name, init){ } for(i=0; i1){ // avoid adding plot_data for previous clicked pie part plot_data_old = []; plot_old = []; - plot_data_old.push(plot.getData()); - plot_old.push(plot); } - if (can_bind){ + plot_data_old.push(plot.getData()); + plot_old.push(plot); + + if (can_bind){ // avoid binding two listener for provider graphs binder(module_name); - if (module_name == "size") + if (module_name == "size") // bind the linked provider graph binder("num"); else if (module_name == "num") binder("size"); - } + } }); @@ -195,80 +217,100 @@ function plot_top_graph(module_name, init){ data: temp_data_bar, color: serie_color }; - var plot = $.plot($(chartID), [barData], barOptions); - if (plot_data_old.length<2){ - plot_data_old.push(plot.getData()); - plot_old.push(plot); - } else { + plot = $.plot($(chartID), [barData], barOptions); + + /* rememeber the data for the two graph */ + if (plot_data_old.length>1){ // avoid adding plot_data for previous clicked pie part plot_data_old = []; plot_old = []; - plot_data_old.push(plot.getData()); - plot_old.push(plot); } - if (can_bind){ - binder(module_name); - if (module_name == "size") - binder("num"); - else if (module_name == "num") - binder("size"); - } - }); - } - }; -} + plot_data_old.push(plot.getData()); + plot_old.push(plot); + if (can_bind){ // avoid binding two listener for provider graphs + $("#flot-bar-chart-"+module_name).unbind( "plothover.customHandler" ); + binder(module_name); + if (module_name == "size"){ // bind the linked provider graph + $("#flot-bar-chart-"+"num").unbind( "plothover.customHandler" ); + binder("num"); + } + else if (module_name == "num"){ + $("#flot-bar-chart-"+"size").unbind( "plothover.customHandler" ); + binder("size"); + } + } + }); + } + + }; // end update_bar_chart + +} // end plot_top_graph + + +/* Bind a listener to the graph to display the value under the cursor in the approriate div */ function binder(module_name){ - $("#flot-bar-chart-"+module_name).bind("plothover", function (event, pos, item) { - if (item) { + $("#flot-bar-chart-"+module_name).bind("plothover.customHandler", function (event, pos, item) { + if (item) { // a correct item is hovered var x = item.datapoint[0] var y = item.datapoint[1] var date = new Date(parseInt(x)); var formated_date = date.getMonth()+'/'+date.getDate(); + var color = item.series.color; + var color_opac = "rgba" + color.slice(3, color.length-1)+",0.15)"; + // display the hovered value in the chart div $("#tooltip_graph-"+module_name).html(item.series.label + " of " + formated_date + " = " + y+"") - .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) + .css({padding: "2px", width: 'auto', 'background': color_opac , 'border': "3px solid "+color}) .fadeIn(200); - var plot_obj = plot_data_old[0]; //contain series - for(serie=0; serie" + curr_data_other+"") - .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) - .fadeIn(200); - for(i=0; i" + curr_data_other+"") - .css({padding: "2px", width: 'auto', 'background-color': 'white', 'border': "3px solid "+item.series.color}) + + $("#tooltip_graph-"+new_module_name).html(item.series.label + " of " + formated_date + " = " + curr_data_other+"") + .css({padding: "2px", width: 'auto', 'background': color_opac, 'border': "3px solid "+color}) .fadeIn(200); + + // clean up other highlighted bar + for(i=0; i
-
+
No bar hovered
@@ -37,7 +37,7 @@
-
+
No bar hovered
@@ -67,7 +67,7 @@

Average paste size by provider

-
+
No bar hovered
@@ -76,7 +76,7 @@

Number of paste by provider

-
+
No bar hovered
From feb1717dc66146ae95a393a86d2bf4de783c145e Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 29 Jul 2016 16:52:50 +0200 Subject: [PATCH 19/49] fixed size bug, which in some cases display the total size instead of the average + few cleanup + commented out log scale in moduleStats.js and added large number division in case of div by 0 --- bin/ModuleStats.py | 92 +++++++++++++++++------------ var/www/Flask_server.py | 26 ++++++-- var/www/static/js/moduleTrending.js | 6 +- 3 files changed, 77 insertions(+), 47 deletions(-) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 8fed1949..b00ba9db 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -16,7 +16,6 @@ from packages import Paste # Config Var max_set_cardinality = 7 -num_day_to_look = 5 def get_date_range(num_day): curr_date = datetime.date.today() @@ -28,24 +27,24 @@ def get_date_range(num_day): return date_list -def compute_most_posted(server, message, num_day): +def compute_most_posted(server, message): module, num, keyword, paste_date = message.split(';') redis_progression_name_set = 'top_'+ module +'_set' # Add/Update in Redis prev_score = server.hget(paste_date, module+'-'+keyword) if prev_score is not None: - server.hset(paste_date, module+'-'+keyword, int(prev_score) + int(num)) + ok = server.hset(paste_date, module+'-'+keyword, int(prev_score) + int(num)) else: - server.hset(paste_date, module+'-'+keyword, int(num)) + ok = server.hset(paste_date, module+'-'+keyword, int(num)) # Compute Most Posted - date_range = get_date_range(num_day) + date = get_date_range(0) # check if this keyword is eligible for progression keyword_total_sum = 0 - for date in date_range: - curr_value = server.hget(date, module+'-'+keyword) - keyword_total_sum += int(curr_value) if curr_value is not None else 0 + + curr_value = server.hget(date, module+'-'+keyword) + keyword_total_sum += int(curr_value) if curr_value is not None else 0 if keyword in server.smembers(redis_progression_name_set): # if it is already in the set return @@ -69,7 +68,7 @@ def compute_most_posted(server, message, num_day): server.sadd(redis_progression_name_set, keyword) -def compute_provider_info(server, path, num_day_to_look): +def compute_provider_info(server, path): redis_avg_size_name_set = 'top_size_set' redis_providers_name_set = 'providers_set' @@ -81,48 +80,65 @@ def compute_provider_info(server, path, num_day_to_look): new_avg = paste_size # Add/Update in Redis - server.sadd(redis_providers_name_set, paste_provider) prev_num_paste = server.hget(paste_provider+'_num', paste_date) if prev_num_paste is not None: - server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1) + ok = server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1) prev_sum_size = server.hget(paste_provider+'_size', paste_date) if prev_sum_size is not None: - server.hset(paste_provider+'_size', paste_date, paste_size) + ok = server.hset(paste_provider+'_size', paste_date, float(prev_sum_size)+paste_size) new_avg = (float(prev_sum_size)+paste_size) / (int(prev_num_paste)+1) else: - server.hset(paste_provider+'_size', paste_date, paste_size) + ok = server.hset(paste_provider+'_size', paste_date, paste_size) else: - server.hset(paste_provider+'_num', paste_date, 1) + ok = server.hset(paste_provider+'_num', paste_date, 1) + # # Compute Most Posted - # check if this keyword is eligible for progression + # - if paste_provider in server.smembers(redis_avg_size_name_set): # if it is already in the set - return - - elif (server.scard(redis_avg_size_name_set) < max_set_cardinality): - server.sadd(redis_avg_size_name_set, paste_provider) - - else: #set full capacity - #Check value for all members - member_set = [] - for provider in server.smembers(redis_avg_size_name_set): - curr_avg = 0.0 - curr_size = server.hget(provider+'_size', paste_date) - curr_num = server.hget(provider+'_num', paste_date) - if (curr_size is not None) and (curr_num is not None): - curr_avg += float(curr_size) / float(curr_num) - member_set.append((provider, curr_avg)) - member_set.sort(key=lambda tup: tup[1]) - if member_set[0][1] < new_avg: - #remove min from set and add the new one - print 'Adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' - server.srem(redis_avg_size_name_set, member_set[0][0]) + # Size + if paste_provider not in server.smembers(redis_avg_size_name_set): # if it is already in the set + if (server.scard(redis_avg_size_name_set) < max_set_cardinality): server.sadd(redis_avg_size_name_set, paste_provider) + else: #set full capacity + #Check value for all members + member_set = [] + for provider in server.smembers(redis_avg_size_name_set): + curr_avg = 0.0 + curr_size = server.hget(provider+'_size', paste_date) + curr_num = server.hget(provider+'_num', paste_date) + if (curr_size is not None) and (curr_num is not None): + curr_avg = float(curr_size) / float(curr_num) + member_set.append((provider, curr_avg)) + member_set.sort(key=lambda tup: tup[1]) + if member_set[0][1] < new_avg: + #remove min from set and add the new one + print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server.srem(redis_avg_size_name_set, member_set[0][0]) + server.sadd(redis_avg_size_name_set, paste_provider) + # Num + if paste_provider not in server.smembers(redis_providers_name_set): # if it is already in the set + if (server.scard(redis_providers_name_set) < max_set_cardinality): + server.sadd(redis_providers_name_set, paste_provider) + + else: #set full capacity + #Check value for all members + member_set = [] + for provider in server.smembers(redis_providers_name_set): + curr_num = server.hget(provider+'_num', paste_date) + member_set.append((provider, int(curr_num))) + member_set.sort(key=lambda tup: tup[1]) + print '------------------------------' + print member_set + if member_set[0][1] < int(prev_num_paste)+1: + #remove min from set and add the new one + print 'Num - adding ' +paste_provider+ '(' +str(int(prev_num_paste)+1)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server.srem(redis_providers_name_set, member_set[0][0]) + server.sadd(redis_providers_name_set, paste_provider) if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) @@ -160,6 +176,6 @@ if __name__ == '__main__': else: # Do something with the message from the queue if len(message.split(';')) > 1: - compute_most_posted(r_serv_trend, message, num_day_to_look) + compute_most_posted(r_serv_trend, message) else: - compute_provider_info(r_serv_trend, message, num_day_to_look) + compute_provider_info(r_serv_trend, message) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index baae91cf..d2772893 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -219,8 +219,16 @@ def providersChart(): date_range = get_date_range(num_day) # Retreive all data from the last num_day for date in date_range: - curr_value = r_serv_charts.hget(keyword_name+'_'+module_name, date) - bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], float(curr_value if curr_value is not None else 0.0)]) + curr_value_size = r_serv_charts.hget(keyword_name+'_'+'size', date) + curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date) + if module_name == "size": + curr_value_num = curr_value_num if curr_value_num is not None else 0 + curr_value_num = curr_value_num if int(curr_value_num) != 0 else 10000000000 + curr_value = float(curr_value_size if curr_value_size is not None else 0.0) / float(curr_value_num) + else: + curr_value = float(curr_value_num if curr_value_num is not None else 0.0) + + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], curr_value]) bar_values.insert(0, keyword_name) return jsonify(bar_values) @@ -230,10 +238,16 @@ def providersChart(): # Iterate over element in top_x_set and retreive their value member_set = [] for keyw in r_serv_charts.smembers(redis_provider_name_set): - redis_provider_name = keyw+'_'+module_name - keyw_value = r_serv_charts.hget(redis_provider_name, get_date_range(0)[0]) - keyw_value = keyw_value if keyw_value is not None else 0.0 - member_set.append((keyw, float(keyw_value))) + redis_provider_name_size = keyw+'_'+'size' + redis_provider_name_num = keyw+'_'+'num' + keyw_value_size = r_serv_charts.hget(redis_provider_name_size, get_date_range(0)[0]) + keyw_value_size = keyw_value_size if keyw_value_size is not None else 0.0 + keyw_value_num = r_serv_charts.hget(redis_provider_name_num, get_date_range(0)[0]) + keyw_value_num = keyw_value_num if keyw_value_num is not None else 0.0 + if module_name == "size": + member_set.append((keyw, float(keyw_value_size)/float(keyw_value_num))) + else: + member_set.append((keyw, float(keyw_value_num))) member_set.sort(key=lambda tup: tup[1], reverse=True) if len(member_set) == 0: member_set.append(("No relevant data", float(100))) diff --git a/var/www/static/js/moduleTrending.js b/var/www/static/js/moduleTrending.js index 076a2e0b..68ea7e21 100644 --- a/var/www/static/js/moduleTrending.js +++ b/var/www/static/js/moduleTrending.js @@ -143,7 +143,7 @@ function plot_top_graph(module_name, init){ for(i=1; i Date: Fri, 29 Jul 2016 16:54:25 +0200 Subject: [PATCH 20/49] removed prints --- bin/ModuleStats.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index b00ba9db..fee29d91 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -132,8 +132,6 @@ def compute_provider_info(server, path): curr_num = server.hget(provider+'_num', paste_date) member_set.append((provider, int(curr_num))) member_set.sort(key=lambda tup: tup[1]) - print '------------------------------' - print member_set if member_set[0][1] < int(prev_num_paste)+1: #remove min from set and add the new one print 'Num - adding ' +paste_provider+ '(' +str(int(prev_num_paste)+1)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' From a1b2329a9be2770f6e345dd09db7680864b11364 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Mon, 1 Aug 2016 10:32:24 +0200 Subject: [PATCH 21/49] Fixed bug in case of null elements arrising because of a new month --- bin/ModuleStats.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index fee29d91..36812dcd 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -93,6 +93,7 @@ def compute_provider_info(server, path): else: ok = server.hset(paste_provider+'_num', paste_date, 1) + prev_num_paste = 0 # # Compute Most Posted @@ -129,14 +130,17 @@ def compute_provider_info(server, path): #Check value for all members member_set = [] for provider in server.smembers(redis_providers_name_set): + curr_num = 0 curr_num = server.hget(provider+'_num', paste_date) - member_set.append((provider, int(curr_num))) + if curr_num is not None: + member_set.append((provider, int(curr_num))) member_set.sort(key=lambda tup: tup[1]) - if member_set[0][1] < int(prev_num_paste)+1: - #remove min from set and add the new one - print 'Num - adding ' +paste_provider+ '(' +str(int(prev_num_paste)+1)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' - server.srem(redis_providers_name_set, member_set[0][0]) - server.sadd(redis_providers_name_set, paste_provider) + if len(member_set) > 0: + if member_set[0][1] < int(prev_num_paste)+1: + #remove min from set and add the new one + print 'Num - adding ' +paste_provider+ '(' +str(int(prev_num_paste)+1)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server.srem(redis_providers_name_set, member_set[0][0]) + server.sadd(redis_providers_name_set, paste_provider) if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) From f3cf2f853a89c288cbd53a7bbbc4c83ea78aea9e Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Mon, 1 Aug 2016 11:08:53 +0200 Subject: [PATCH 22/49] Fixed a bug in the hover of providers graph. When we click on a part in providers graphs, we hover an item, then we click on a random part not belonging to providers graphs, and we re-hover one part of the providers graphs, there was an undefined variable (The random click overrided it). --- var/www/static/js/moduleTrending.js | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/var/www/static/js/moduleTrending.js b/var/www/static/js/moduleTrending.js index 68ea7e21..ff32cc1b 100644 --- a/var/www/static/js/moduleTrending.js +++ b/var/www/static/js/moduleTrending.js @@ -220,12 +220,14 @@ function plot_top_graph(module_name, init){ plot = $.plot($(chartID), [barData], barOptions); /* rememeber the data for the two graph */ - if (plot_data_old.length>1){ // avoid adding plot_data for previous clicked pie part - plot_data_old = []; - plot_old = []; + if ((module_name == "size") || (module_name == "num")) { // Add only for the provider graph + if (plot_data_old.length>1){ // avoid adding plot_data for previous clicked pie part + plot_data_old = []; + plot_old = []; + } + plot_data_old.push(plot.getData()); + plot_old.push(plot); } - plot_data_old.push(plot.getData()); - plot_old.push(plot); if (can_bind){ // avoid binding two listener for provider graphs $("#flot-bar-chart-"+module_name).unbind( "plothover.customHandler" ); From 3169dc27b06434f9a30eca4a8b9a5e5c51e4eccb Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Tue, 2 Aug 2016 15:43:11 +0200 Subject: [PATCH 23/49] Added SQLInjectionDetection module --- bin/LAUNCH.sh | 2 + bin/SQLInjectionDetection.py | 155 +++++++++++++++++++++++++++++++++++ bin/Url.py | 2 +- bin/WebStats.py | 2 +- bin/packages/modules.cfg | 3 + 5 files changed, 162 insertions(+), 2 deletions(-) create mode 100755 bin/SQLInjectionDetection.py diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 5afc3c2a..b74dc455 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -142,6 +142,8 @@ function launching_scripts { screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' sleep 0.1 screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' } #If no params, display the help diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py new file mode 100755 index 00000000..5be7d98c --- /dev/null +++ b/bin/SQLInjectionDetection.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* +""" + Template for new modules +""" + +import time +import sys +import string +import datetime +import redis +import os +import urllib2 +import re +from pubsublogger import publisher +from Helper import Process +from packages import Paste +from pyfaup.faup import Faup + +# Config Var + +regex_injection = [] +word_injection = [] + +# Classic atome injection +regex_injection1 = "([[AND |OR ]+[\'|\"]?[0-9a-zA-Z]+[\'|\"]?=[\'|\"]?[0-9a-zA-Z]+[\'|\"]?])" +regex_injection.append(regex_injection1) + +# Time-based attack +regex_injection2 = ["SLEEP\([0-9]+", "BENCHMARK\([0-9]+", "WAIT FOR DELAY ", "WAITFOR DELAY"] +regex_injection2 = re.compile('|'.join(regex_injection2)) +regex_injection.append(regex_injection2) + +# Interesting keyword +word_injection1 = [" IF ", " ELSE ", " CASE ", " WHEN ", " END ", " UNION ", "SELECT ", " FROM ", " ORDER BY ", " WHERE ", " DELETE ", " DROP ", " UPDATE ", " EXEC "] +word_injection.append(word_injection1) + +# Comment +word_injection2 = ["--", "#", "/*"] +word_injection.append(word_injection2) + +# Database special keywords +word_injection3 = ["@@version", "POW(", "BITAND(", "SQUARE("] +word_injection.append(word_injection3) + +# Html keywords +word_injection4 = [" + + + + + + + + + +
+ + + + + + +
+
+
+

Browse important pastes

+
+ +
+ +
+ + + +
+ + +
+
+ + + + + + + + + + + + {% set i = 0 %} + {% for path in all_path %} + + + + + + + + {% set i = i + 1 %} + {% endfor %} + +
#PathDate# of linesAction
{{ i + 1 }}{{ path }}{{ paste_date[i] }}{{ paste_linenum[i] }}

+
+
+ coucou2 +
+
+ coucou3 +
+
+ coucou4 +
+
+ +
+ + + + + + + + + +
+ + + diff --git a/var/www/templates/index.html b/var/www/templates/index.html index d12adfa4..b6426ea7 100644 --- a/var/www/templates/index.html +++ b/var/www/templates/index.html @@ -39,6 +39,7 @@
  • Dashboard
  • Trending charts
  • Modules statistics
  • +
  • Browse important pastes
  • diff --git a/var/www/templates/show_saved_paste.html b/var/www/templates/show_saved_paste.html index 0da148f3..75d8ab46 100644 --- a/var/www/templates/show_saved_paste.html +++ b/var/www/templates/show_saved_paste.html @@ -62,7 +62,7 @@ {% endif %}

    Content:

    -

    {{ content }}

    +

    {{ content }}

    From 78c611fead78cccc98f662150299e2eb5b535ee6 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Mon, 8 Aug 2016 09:17:44 +0200 Subject: [PATCH 31/49] Added warning_paste module and created related webpages. Fixed a Faup bug in credential (multiple instanciation) and added correc populate_set_out in concerned modules (creditcard, credential, ...). Linked browse_warning_paste module and Flask function with redis (created new sets). --- bin/Browse_warning_paste.py | 57 ++++++ bin/Credential.py | 5 +- bin/CreditCard.py | 2 + bin/Cve.py | 7 +- bin/Duplicate_ssdeep_v2.py | 2 +- bin/Keys.py | 5 +- bin/LAUNCH.sh | 2 + bin/Mail.py | 1 + bin/Phone.py | 4 +- bin/SQLInjectionDetection.py | 4 + bin/Url.py | 7 +- bin/packages/modules.cfg | 19 +- var/www/Flask_server.py | 22 ++- var/www/templates/Moduletrending.html | 1 + var/www/templates/Trending.html | 1 + var/www/templates/browse_important_paste.html | 164 ++++++------------ .../templates/important_paste_by_module.html | 112 ++++++++++++ 17 files changed, 280 insertions(+), 135 deletions(-) create mode 100755 bin/Browse_warning_paste.py create mode 100644 var/www/templates/important_paste_by_module.html diff --git a/bin/Browse_warning_paste.py b/bin/Browse_warning_paste.py new file mode 100755 index 00000000..49444979 --- /dev/null +++ b/bin/Browse_warning_paste.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +""" +The Browse_warning_paste module +==================== + +This module saved signaled paste (logged as 'warning') in redis for further usage +like browsing by category + +Its input comes from other modules, namely: + Credential, CreditCard, SQLinjection, CVE, Keys, Mail and Phone + +""" + +import redis +import time +from datetime import datetime, timedelta +from packages import Paste +from pubsublogger import publisher +from Helper import Process + +if __name__ == "__main__": + publisher.port = 6380 + publisher.channel = "Script" + + config_section = 'Browse_warning_paste' + + p = Process(config_section) + + server = redis.StrictRedis( + host=p.config.get("Redis_Level_DB", "host"), + port=p.config.get("Redis_Level_DB", "port"), + db=p.config.get("Redis_Level_DB", "db")) + + # FUNCTIONS # + publisher.info("Script duplicate started") + + while True: + message = p.get_from_set() + if message is not None: + module_name, p_path = message.split(';') + #PST = Paste.Paste(p_path) + else: + publisher.debug("Script Attribute is idling 10s") + time.sleep(10) + continue + + # Add in redis + # Format in set: WARNING_moduleName -> p_path + key = "WARNING_" + module_name + print key + ' -> ' + p_path + server.sadd(key, p_path) + + publisher.info('Saved in warning paste {}'.format(p_path)) + #print 'Saved in warning paste {}'.format(p_path) + diff --git a/bin/Credential.py b/bin/Credential.py index 23c90506..cfbec7c7 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -15,6 +15,8 @@ if __name__ == "__main__": p = Process(config_section) publisher.info("Find credentials") + faup = Faup() + critical = 8 regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" @@ -55,10 +57,11 @@ if __name__ == "__main__": publisher.warning(to_print) #Send to duplicate p.populate_set_out(filepath, 'Duplicate') + #send to Browse_warning_paste + p.populate_set_out('credential;{}'.format(filepath), 'BrowseWarningPaste') #Put in form, count occurences, then send to moduleStats creds_sites = {} - faup = Faup() for url in sites: faup.decode(url) domain = faup.get()['domain'] diff --git a/bin/CreditCard.py b/bin/CreditCard.py index de90f4d4..e5f8020e 100755 --- a/bin/CreditCard.py +++ b/bin/CreditCard.py @@ -67,6 +67,8 @@ if __name__ == "__main__": to_print, len(creditcard_set))) #Send to duplicate p.populate_set_out(filepath, 'Redis_Duplicate') + #send to Browse_warning_paste + p.populate_set_out('creditCard;{}'.format(filename), 'BrowseWarningPaste') else: publisher.info('{}CreditCard related'.format(to_print)) else: diff --git a/bin/Cve.py b/bin/Cve.py index 7323ee5a..1e152463 100755 --- a/bin/Cve.py +++ b/bin/Cve.py @@ -53,5 +53,8 @@ if __name__ == '__main__': # Do something with the message from the queue search_cve(message) - # (Optional) Send that thing to the next queue - #p.populate_set_out(something_has_been_done) + #send to Browse_warning_paste + filepath, count = message.split() + p.populate_set_out('cve;{}'.format(filepath), 'BrowseWarningPaste') + #Send to duplicate + p.populate_set_out(filepath, 'Duplicate') diff --git a/bin/Duplicate_ssdeep_v2.py b/bin/Duplicate_ssdeep_v2.py index 22498b90..a2ab55aa 100755 --- a/bin/Duplicate_ssdeep_v2.py +++ b/bin/Duplicate_ssdeep_v2.py @@ -7,7 +7,7 @@ The Duplicate module This huge module is, in short term, checking duplicates. Its input comes from other modules, namely: - Credential, CreditCard, Keys, Mails and Phone + Credential, CreditCard, Keys, Mails, SQLinjectionDetection, CVE and Phone This one differ from v1 by only using redis and not json file stored on disk diff --git a/bin/Keys.py b/bin/Keys.py index 9c44f60a..a286dada 100755 --- a/bin/Keys.py +++ b/bin/Keys.py @@ -17,7 +17,9 @@ def search_gpg(message): if '-----BEGIN PGP MESSAGE-----' in content: publisher.warning('{} has a PGP enc message'.format(paste.p_name)) #Send to duplicate - p.populate_set_out(message) + p.populate_set_out(message, 'Duplicate') + #send to Browse_warning_paste + p.populate_set_out('keys;{}'.format(message), 'BrowseWarningPaste') if __name__ == '__main__': @@ -49,4 +51,3 @@ if __name__ == '__main__': search_gpg(message) # (Optional) Send that thing to the next queue - #p.populate_set_out(something_has_been_done) diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index b74dc455..86877c2a 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -144,6 +144,8 @@ function launching_scripts { screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' sleep 0.1 screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' } #If no params, display the help diff --git a/bin/Mail.py b/bin/Mail.py index d3968442..2b3ed5fc 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -69,6 +69,7 @@ if __name__ == "__main__": for mail in MX_values[1]: print 'mail;{};{};{}'.format(1, mail, PST.p_date) p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') + p.populate_set_out('mail;{}'.format(filename), 'BrowseWarningPaste') prec_filename = filename diff --git a/bin/Phone.py b/bin/Phone.py index b53b079c..b25dae41 100755 --- a/bin/Phone.py +++ b/bin/Phone.py @@ -23,8 +23,10 @@ def search_phone(message): if len(results) > 4: print results publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) + #send to Browse_warning_paste + p.populate_set_out('phone;{}'.format(message), 'BrowseWarningPaste') #Send to duplicate - p.populate_set_out(message) + p.populate_set_out(message, 'Duplicate') if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index 9dae63bd..d6c3efa5 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -74,6 +74,10 @@ def analyse(url, path): print urllib2.unquote(url) to_print = 'SQLInjection;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL") publisher.warning(to_print) + #Send to duplicate + p.populate_set_out(path, 'Duplicate') + #send to Browse_warning_paste + p.populate_set_out('sqlInjectionDetection;{}'.format(path), 'BrowseWarningPaste') else: print "Potential SQL injection:" print urllib2.unquote(url) diff --git a/bin/Url.py b/bin/Url.py index b01c2725..398ca49d 100755 --- a/bin/Url.py +++ b/bin/Url.py @@ -103,10 +103,11 @@ if __name__ == "__main__": print hostl, asn, cc, \ pycountry.countries.get(alpha2=cc).name if cc == cc_critical: - publisher.warning( - 'Url;{};{};{};Detected {} {}'.format( + to_print = 'Url;{};{};{};Detected {} {}'.format( PST.p_source, PST.p_date, PST.p_name, - hostl, cc)) + hostl, cc) + #publisher.warning(to_print) + print to_print else: print hostl, asn, cc diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 56a2f6be..546ddef7 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -31,11 +31,11 @@ publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Re [CreditCards] subscribe = Redis_CreditCard -publish = Redis_Duplicate,Redis_ModuleStats +publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste [Mail] subscribe = Redis_Mail -publish = Redis_Duplicate,Redis_ModuleStats +publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste [Onion] subscribe = Redis_Onion @@ -54,27 +54,36 @@ subscribe = Redis_Url [SQLInjectionDetection] subscribe = Redis_Url +publish = Redis_BrowseWarningPaste,Redis_Duplicate [ModuleStats] subscribe = Redis_ModuleStats +[Browse_warning_paste] +subscribe = Redis_BrowseWarningPaste + +#[send_to_queue] +#subscribe = Redis_Cve +#publish = Redis_BrowseWarningPaste + [Release] subscribe = Redis_Global [Credential] subscribe = Redis_Credential -publish = Redis_Duplicate,Redis_ModuleStats +publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste [Cve] subscribe = Redis_Cve +publish = Redis_Browse_warning_paste,Redis_Duplicate [Phone] subscribe = Redis_Global -publish = Redis_Duplicate +publish = Redis_Duplicate,Redis_BrowseWarningPaste [SourceCode] subscribe = Redis_SourceCode [Keys] subscribe = Redis_Global -publish = Redis_Duplicate +publish = Redis_Duplicate,Redis_BrowseWarningPaste diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 33f85c0e..ee2735b3 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -44,6 +44,10 @@ r_serv_charts = redis.StrictRedis( port=cfg.getint("Redis_Level_DB_Trending", "port"), db=cfg.getint("Redis_Level_DB_Trending", "db")) +r_serv_db = redis.StrictRedis( + host=cfg.get("Redis_Level_DB", "host"), + port=cfg.getint("Redis_Level_DB", "port"), + db=cfg.getint("Redis_Level_DB", "db")) app = Flask(__name__, static_url_path='/static/') @@ -157,9 +161,11 @@ def showpaste(content_range): return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list) -def getPastebyType(module_name): +def getPastebyType(server, module_name): all_path = [] - all_path.append("/home/mokaddem/AIL-framework/PASTES/archive/paste.debian.net/2016/06/30/771058.gz") + for path in server.smembers('WARNING_'+module_name): + #all_path.append("/home/mokaddem/AIL-framework/PASTES/archive/paste.debian.net/2016/06/30/771058.gz") + all_path.append(path) return all_path @@ -377,13 +383,19 @@ def trending(): @app.route("/browseImportantPaste/", methods=['GET']) def browseImportantPaste(): module_name = request.args.get('moduleName') + return render_template("browse_important_paste.html") + + +@app.route("/importantPasteByModule/", methods=['GET']) +def importantPasteByModule(): + module_name = request.args.get('moduleName') all_content = [] paste_date = [] paste_linenum = [] all_path = [] - for path in getPastebyType(module_name): + for path in getPastebyType(r_serv_db, module_name): all_path.append(path) paste = Paste.Paste(path) content = paste.get_p_content().decode('utf8', 'ignore') @@ -394,9 +406,7 @@ def browseImportantPaste(): paste_date.append(curr_date) paste_linenum.append(paste.get_lines_info()[0]) - return render_template("browse_important_paste.html", all_path=all_path, content=all_content, paste_date=paste_date, paste_linenum=paste_linenum, char_to_display=max_preview_modal) - - + return render_template("important_paste_by_module.html", all_path=all_path, content=all_content, paste_date=paste_date, paste_linenum=paste_linenum, char_to_display=max_preview_modal) @app.route("/moduletrending/") def moduletrending(): diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html index d3ddc31b..0e51f95e 100644 --- a/var/www/templates/Moduletrending.html +++ b/var/www/templates/Moduletrending.html @@ -29,6 +29,7 @@
  • Dashboard
  • Trending charts
  • Modules statistics
  • +
  • Browse important pastes
  • diff --git a/var/www/templates/Trending.html b/var/www/templates/Trending.html index 26493a87..f27cad84 100644 --- a/var/www/templates/Trending.html +++ b/var/www/templates/Trending.html @@ -33,6 +33,7 @@
  • Dashboard
  • Trending charts
  • Modules statistics
  • +
  • Browse important pastes
  • diff --git a/var/www/templates/browse_important_paste.html b/var/www/templates/browse_important_paste.html index d9c2de8b..881096f4 100644 --- a/var/www/templates/browse_important_paste.html +++ b/var/www/templates/browse_important_paste.html @@ -16,7 +16,7 @@ - +