diff --git a/README.md b/README.md index 4dcaf8af..0dd7a325 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,7 @@ LICENSE Copyright (C) 2014-2016 CIRCL - Computer Incident Response Center Luxembourg (c/o smile, security made in Lëtzebuerg, Groupement d'Intérêt Economique) Copyright (c) 2014-2016 Raphaël Vinot Copyright (c) 2014-2016 Alexandre Dulaunoy + Copyright (c) 2016 Sami Mokaddem This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by diff --git a/bin/CreditCard.py b/bin/CreditCard.py index 42eefd09..430c6d7a 100755 --- a/bin/CreditCard.py +++ b/bin/CreditCard.py @@ -66,13 +66,12 @@ if __name__ == "__main__": publisher.warning('{}Checked {} valid number(s)'.format( to_print, len(creditcard_set))) #Send to duplicate - p.populate_set_out(filepath, 'Redis_Duplicate') + p.populate_set_out(filepath, 'Duplicate') #send to Browse_warning_paste p.populate_set_out('creditcard;{}'.format(filename), 'BrowseWarningPaste') else: publisher.info('{}CreditCard related'.format(to_print)) else: publisher.debug("Script creditcard is idling 1m") - print 'Sleeping' time.sleep(10) diff --git a/bin/Curve.py b/bin/Curve.py index 4c3378ea..59557881 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -1,9 +1,6 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* """ -The ZMQ_Sub_Curve Module -============================ - This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. This modules update a .csv file used to draw curves representing selected @@ -14,6 +11,12 @@ words and their occurency per day. ..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put the same Subscriber name in both of them. + +This Module is also used for term frequency. + +/!\ Top set management is done in the module Curve_manage_top_set + + Requirements ------------ @@ -28,9 +31,39 @@ from pubsublogger import publisher from packages import lib_words import os import datetime +import calendar from Helper import Process +# Config Variables +BlackListTermsSet_Name = "BlackListSetTermSet" +TrackedTermsSet_Name = "TrackedSetTermSet" +top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set +oneDay = 60*60*24 +top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] +top_termFreq_setName_week = ["TopTermFreq_set_week", 7] +top_termFreq_setName_month = ["TopTermFreq_set_month", 31] +top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] + + +def check_if_tracked_term(term, path): + if term in server_term.smembers(TrackedTermsSet_Name): + #add_paste to tracked_word_set + set_name = "tracked_" + term + server_term.sadd(set_name, path) + print term, 'addded', set_name, '->', path + p.populate_set_out("New Term added", 'CurveManageTopSets') + + +def getValueOverRange(word, startDate, num_day): + to_return = 0 + for timestamp in range(startDate, startDate - num_day*oneDay, -oneDay): + value = server_term.hget(timestamp, word) + to_return += int(value) if value is not None else 0 + return to_return + + + if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" @@ -44,6 +77,11 @@ if __name__ == "__main__": port=p.config.get("Redis_Level_DB_Curve", "port"), db=p.config.get("Redis_Level_DB_Curve", "db")) + server_term = redis.StrictRedis( + host=p.config.get("Redis_Level_DB_TermFreq", "host"), + port=p.config.get("Redis_Level_DB_TermFreq", "port"), + db=p.config.get("Redis_Level_DB_TermFreq", "db")) + # FUNCTIONS # publisher.info("Script Curve started") @@ -56,22 +94,40 @@ if __name__ == "__main__": message = p.get_from_set() prec_filename = None generate_new_graph = False + + # Term Frequency + top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] + top_termFreq_setName_week = ["TopTermFreq_set_week", 7] + top_termFreq_setName_month = ["TopTermFreq_set_month", 31] + while True: + if message is not None: generate_new_graph = True filename, word, score = message.split() temp = filename.split('/') date = temp[-4] + temp[-3] + temp[-2] + timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) + curr_set = top_termFreq_setName_day[0] + str(timestamp) + low_word = word.lower() - prev_score = r_serv1.hget(low_word, date) - if prev_score is not None: - r_serv1.hset(low_word, date, int(prev_score) + int(score)) - else: - r_serv1.hset(low_word, date, score) + #Old curve with words in file + r_serv1.hincrby(low_word, date, int(score)) + + # Update redis + curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) + + # Add in set only if term is not in the blacklist + if low_word not in server_term.smembers(BlackListTermsSet_Name): + server_term.zincrby(curr_set, low_word, float(score)) + + #Add more info for tracked terms + check_if_tracked_term(low_word, filename) else: + if generate_new_graph: generate_new_graph = False print 'Building graph' diff --git a/bin/Curve_manage_top_sets.py b/bin/Curve_manage_top_sets.py new file mode 100755 index 00000000..34c1c238 --- /dev/null +++ b/bin/Curve_manage_top_sets.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* +""" + +This module manage top sets for terms frequency. +Every 'refresh_rate' update the weekly and monthly set + + +Requirements +------------ + +*Need running Redis instances. (Redis) +*Categories files of words in /files/ need to be created +*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. + +""" + +import redis +import time +import copy +from pubsublogger import publisher +from packages import lib_words +import datetime +import calendar + +from Helper import Process + +# Config Variables +Refresh_rate = 60*5 #sec +BlackListTermsSet_Name = "BlackListSetTermSet" +TrackedTermsSet_Name = "TrackedSetTermSet" +top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set +oneDay = 60*60*24 +num_day_month = 31 +num_day_week = 7 + +top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] +top_termFreq_setName_week = ["TopTermFreq_set_week", 7] +top_termFreq_setName_month = ["TopTermFreq_set_month", 31] +top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] + + +def manage_top_set(): + startDate = datetime.datetime.now() + startDate = startDate.replace(hour=0, minute=0, second=0, microsecond=0) + startDate = calendar.timegm(startDate.timetuple()) + + dico = {} + + # Retreive top data (2*max_card) from days sets + for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay): + curr_set = top_termFreq_setName_day[0] + str(timestamp) + array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality*2) + + for word, value in array_top_day: + if word not in server_term.smembers(BlackListTermsSet_Name): + if word in dico.keys(): + dico[word] += value + else: + dico[word] = value + + if timestamp == startDate - num_day_week*oneDay: + dico_week = copy.deepcopy(dico) + + # convert dico into sorted array + array_month = [] + for w, v in dico.iteritems(): + array_month.append((w, v)) + array_month.sort(key=lambda tup: -tup[1]) + array_month = array_month[0:20] + + array_week = [] + for w, v in dico_week.iteritems(): + array_week.append((w, v)) + array_week.sort(key=lambda tup: -tup[1]) + array_week = array_week[0:20] + + # suppress every terms in top sets + for curr_set, curr_num_day in top_termFreq_set_array[1:3]: + for w in server_term.zrange(curr_set, 0, -1): + server_term.zrem(curr_set, w) + + # Add top term from sorted array in their respective sorted sets + for elem in array_week: + server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0]) + + for elem in array_month: + server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0]) + + + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + config_section = 'CurveManageTopSets' + p = Process(config_section) + + server_term = redis.StrictRedis( + host=p.config.get("Redis_Level_DB_TermFreq", "host"), + port=p.config.get("Redis_Level_DB_TermFreq", "port"), + db=p.config.get("Redis_Level_DB_TermFreq", "db")) + + publisher.info("Script Curve_manage_top_set started") + + # Sent to the logging a description of the module + publisher.info("Manage the top sets with the data created by the module curve.") + + manage_top_set() + + while True: + # Get one message from the input queue (module only work if linked with a queue) + message = p.get_from_set() + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + print 'sleeping' + time.sleep(Refresh_rate) # sleep a long time then manage the set + manage_top_set() + continue + diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 7f1220c9..fef9c3ce 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -58,6 +58,10 @@ function launching_redis { screen -S "Redis" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x' sleep 0.1 screen -S "Redis" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x' + + # For Words and curves + sleep 0.1 + screen -S "Redis" -X screen -t "6382" bash -c 'redis-server '$conf_dir'6382.conf ; read x' } function launching_lvldb { @@ -134,6 +138,8 @@ function launching_scripts { sleep 0.1 screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' sleep 0.1 + screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' + sleep 0.1 screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' @@ -151,6 +157,9 @@ function launching_scripts { screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' + } #If no params, display the help diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 32c631ed..648649f7 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -1,7 +1,8 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* """ - Template for new modules + This module makes statistics for some modules and providers + """ import time @@ -15,7 +16,7 @@ from Helper import Process from packages import Paste # Config Var -max_set_cardinality = 7 +max_set_cardinality = 8 def get_date_range(num_day): curr_date = datetime.date.today() @@ -30,14 +31,10 @@ def get_date_range(num_day): def compute_most_posted(server, message): module, num, keyword, paste_date = message.split(';') - redis_progression_name_set = 'top_'+ module +'_set' + redis_progression_name_set = 'top_'+ module +'_set_' + paste_date # Add/Update in Redis - prev_score = server.hget(paste_date, module+'-'+keyword) - if prev_score is not None: - ok = server.hset(paste_date, module+'-'+keyword, int(prev_score) + int(num)) - else: - ok = server.hset(paste_date, module+'-'+keyword, int(num)) + server.hincrby(paste_date, module+'-'+keyword, int(num)) # Compute Most Posted date = get_date_range(0)[0] @@ -47,103 +44,76 @@ def compute_most_posted(server, message): curr_value = server.hget(date, module+'-'+keyword) keyword_total_sum += int(curr_value) if curr_value is not None else 0 - if keyword in server.smembers(redis_progression_name_set): # if it is already in the set - return + if server.zcard(redis_progression_name_set) < max_set_cardinality: + server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) - if (server.scard(redis_progression_name_set) < max_set_cardinality): - server.sadd(redis_progression_name_set, keyword) - - else: #not in the set - #Check value for all members - member_set = [] - for keyw in server.smembers(redis_progression_name_set): - keyw_value = server.hget(paste_date, module+'-'+keyw) - if keyw_value is not None: - member_set.append((keyw, int(keyw_value))) - else: #No data for this set for today - member_set.append((keyw, int(0))) - member_set.sort(key=lambda tup: tup[1]) - if len(member_set) > 0: - if member_set[0][1] < keyword_total_sum: - #remove min from set and add the new one - print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' - server.srem(redis_progression_name_set, member_set[0][0]) - server.sadd(redis_progression_name_set, keyword) + else: # not in set + member_set = server.zrangebyscore(redis_progression_name_set, '-inf', '+inf', withscores=True, start=0, num=1) + # Member set is a list of (value, score) pairs + if int(member_set[0][1]) < keyword_total_sum: + #remove min from set and add the new one + print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server.zrem(redis_progression_name_set, member_set[0][0]) + server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) + print redis_progression_name_set -def compute_provider_info(server, path): +def compute_provider_info(server_trend, server_pasteName, path): + redis_all_provider = 'all_provider_set' - redis_avg_size_name_set = 'top_size_set' - redis_providers_name_set = 'providers_set' paste = Paste.Paste(path) + paste_baseName = paste.p_name.split('.')[0] paste_size = paste._get_p_size() paste_provider = paste.p_source - paste_date = paste._get_p_date() - new_avg = paste_size + paste_date = str(paste._get_p_date()) + redis_sum_size_set = 'top_size_set_' + paste_date + redis_avg_size_name_set = 'top_avg_size_set_' + paste_date + redis_providers_name_set = 'providers_set_' + paste_date # Add/Update in Redis - prev_num_paste = server.hget(paste_provider+'_num', paste_date) - if prev_num_paste is not None: - ok = server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1) - prev_sum_size = server.hget(paste_provider+'_size', paste_date) - - if prev_sum_size is not None: - ok = server.hset(paste_provider+'_size', paste_date, float(prev_sum_size)+paste_size) - new_avg = (float(prev_sum_size)+paste_size) / (int(prev_num_paste)+1) - else: - ok = server.hset(paste_provider+'_size', paste_date, paste_size) + server_pasteName.sadd(paste_baseName, path) + server_trend.sadd(redis_all_provider, paste_provider) + + num_paste = int(server_trend.hincrby(paste_provider+'_num', paste_date, 1)) + sum_size = float(server_trend.hincrbyfloat(paste_provider+'_size', paste_date, paste_size)) + new_avg = float(sum_size) / float(num_paste) + server_trend.hset(paste_provider +'_avg', paste_date, new_avg) - else: - ok = server.hset(paste_provider+'_num', paste_date, 1) - prev_num_paste = 0 # # Compute Most Posted # # Size - if paste_provider not in server.smembers(redis_avg_size_name_set): # if it is already in the set - if (server.scard(redis_avg_size_name_set) < max_set_cardinality): - server.sadd(redis_avg_size_name_set, paste_provider) + if server_trend.zcard(redis_sum_size_set) < max_set_cardinality or server_trend.zscore(redis_sum_size_set, paste_provider) != "nil": + server_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider) + server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider) + else: #set full capacity + member_set = server_trend.zrangebyscore(redis_sum_size_set, '-inf', '+inf', withscores=True, start=0, num=1) + # Member set is a list of (value, score) pairs + if float(member_set[0][1]) < new_avg: + #remove min from set and add the new one + print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server_trend.zrem(redis_sum_size_set, member_set[0][0]) + server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider) + server_trend.zrem(redis_avg_size_name_set, member_set[0][0]) + server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider) - else: #set full capacity - #Check value for all members - member_set = [] - for provider in server.smembers(redis_avg_size_name_set): - curr_avg = 0.0 - curr_size = server.hget(provider+'_size', paste_date) - curr_num = server.hget(provider+'_num', paste_date) - if (curr_size is not None) and (curr_num is not None): - curr_avg = float(curr_size) / float(curr_num) - member_set.append((provider, curr_avg)) - member_set.sort(key=lambda tup: tup[1]) - if member_set[0][1] < new_avg: - #remove min from set and add the new one - print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' - server.srem(redis_avg_size_name_set, member_set[0][0]) - server.sadd(redis_avg_size_name_set, paste_provider) # Num - if paste_provider not in server.smembers(redis_providers_name_set): # if it is already in the set - if (server.scard(redis_providers_name_set) < max_set_cardinality): - server.sadd(redis_providers_name_set, paste_provider) + # if set not full or provider already present + if server_trend.zcard(redis_providers_name_set) < max_set_cardinality or server_trend.zscore(redis_providers_name_set, paste_provider) != "nil": + server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider) + else: #set at full capacity + member_set = server_trend.zrangebyscore(redis_providers_name_set, '-inf', '+inf', withscores=True, start=0, num=1) + # Member set is a list of (value, score) pairs + if int(member_set[0][1]) < num_paste: + #remove min from set and add the new one + print 'Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + server_trend.zrem(member_set[0][0]) + server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider) - else: #set full capacity - #Check value for all members - member_set = [] - for provider in server.smembers(redis_providers_name_set): - curr_num = 0 - curr_num = server.hget(provider+'_num', paste_date) - if curr_num is not None: - member_set.append((provider, int(curr_num))) - member_set.sort(key=lambda tup: tup[1]) - if len(member_set) > 0: - if member_set[0][1] < int(prev_num_paste)+1: - #remove min from set and add the new one - print 'Num - adding ' +paste_provider+ '(' +str(int(prev_num_paste)+1)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' - server.srem(redis_providers_name_set, member_set[0][0]) - server.sadd(redis_providers_name_set, paste_provider) if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) @@ -167,6 +137,11 @@ if __name__ == '__main__': port=p.config.get("Redis_Level_DB_Trending", "port"), db=p.config.get("Redis_Level_DB_Trending", "db")) + r_serv_pasteName = redis.StrictRedis( + host=p.config.get("Redis_Paste_Name", "host"), + port=p.config.get("Redis_Paste_Name", "port"), + db=p.config.get("Redis_Paste_Name", "db")) + # Endless loop getting messages from the input queue while True: # Get one message from the input queue @@ -183,4 +158,4 @@ if __name__ == '__main__': if len(message.split(';')) > 1: compute_most_posted(r_serv_trend, message) else: - compute_provider_info(r_serv_trend, message) + compute_provider_info(r_serv_trend, r_serv_pasteName, message) diff --git a/bin/SentimentAnalyser.py b/bin/SentimentAnalyser.py new file mode 100755 index 00000000..09f59e40 --- /dev/null +++ b/bin/SentimentAnalyser.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* +""" + Sentiment analyser module. + It takes its inputs from 'global'. + + The content analysed comes from the pastes with length of the line + above a defined threshold removed (get_p_content_with_removed_lines). + This is done because NLTK sentences tokemnizer (sent_tokenize) seems to crash + for long lines (function _slices_from_text line#1276). + + + nltk.sentiment.vader module credit: + Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. + +""" + +import time +import datetime +import calendar +import redis +import json +from pubsublogger import publisher +from Helper import Process +from packages import Paste + +from nltk.sentiment.vader import SentimentIntensityAnalyzer +from nltk import tokenize + +# Config Variables +accepted_Mime_type = ['text/plain'] +size_threshold = 250 +line_max_length_threshold = 1000 + +def Analyse(message, server): + path = message + paste = Paste.Paste(path) + + # get content with removed line + number of them + num_line_removed, p_content = paste.get_p_content_with_removed_lines(line_max_length_threshold) + provider = paste.p_source + p_date = str(paste._get_p_date()) + p_MimeType = paste._get_p_encoding() + + # Perform further analysis + if p_MimeType == "text/plain": + if isJSON(p_content): + p_MimeType = "JSON" + + if p_MimeType in accepted_Mime_type: + + the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8])) + the_time = datetime.datetime.now() + the_time = datetime.time(getattr(the_time, 'hour'), 0, 0) + combined_datetime = datetime.datetime.combine(the_date, the_time) + timestamp = calendar.timegm(combined_datetime.timetuple()) + + sentences = tokenize.sent_tokenize(p_content.decode('utf-8', 'ignore')) + + if len(sentences) > 0: + avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} + neg_line = 0 + pos_line = 0 + sid = SentimentIntensityAnalyzer() + for sentence in sentences: + ss = sid.polarity_scores(sentence) + for k in sorted(ss): + if k == 'compound': + if ss['neg'] > ss['pos']: + avg_score['compoundNeg'] += ss[k] + neg_line += 1 + else: + avg_score['compoundPos'] += ss[k] + pos_line += 1 + else: + avg_score[k] += ss[k] + + + for k in avg_score: + if k == 'compoundPos': + avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1) + elif k == 'compoundNeg': + avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1) + else: + avg_score[k] = avg_score[k] / len(sentences) + + + # In redis-levelDB: {} = set, () = K-V + # {Provider_set -> provider_i} + # {Provider_TimestampInHour_i -> UniqID_i}_j + # (UniqID_i -> PasteValue_i) + + server.sadd('Provider_set', provider) + + provider_timestamp = provider + '_' + str(timestamp) + server.incr('UniqID') + UniqID = server.get('UniqID') + print provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines' + server.sadd(provider_timestamp, UniqID) + server.set(UniqID, avg_score) + else: + print 'Dropped:', p_MimeType + + +def isJSON(content): + try: + json.loads(content) + return True + + except Exception,e: + return False + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'SentimentAnalysis' + + # Setup the I/O queues + p = Process(config_section) + + # Sent to the logging a description of the module + publisher.info("") + + # REDIS_LEVEL_DB # + server = redis.StrictRedis( + host=p.config.get("Redis_Level_DB_Sentiment", "host"), + port=p.config.get("Redis_Level_DB_Sentiment", "port"), + db=p.config.get("Redis_Level_DB_Sentiment", "db")) + + while True: + message = p.get_from_set() + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + Analyse(message, server) + diff --git a/bin/WebStats.py b/bin/WebStats.py index 837fa2fe..6fdd9ee3 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -23,16 +23,11 @@ num_day_to_look = 5 # the detection of the progression start num_day_to_lo def analyse(server, field_name, date, url_parsed): field = url_parsed[field_name] if field is not None: - prev_score = server.hget(field, date) - if prev_score is not None: - server.hset(field, date, int(prev_score) + 1) - - else: - server.hset(field, date, 1) - if field_name == "domain": #save domain in a set for the monthly plot - domain_set_name = "domain_set_" + date[0:6] - server.sadd(domain_set_name, field) - print "added in " + domain_set_name +": "+ field + server.hincrby(field, date, 1) + if field_name == "domain": #save domain in a set for the monthly plot + domain_set_name = "domain_set_" + date[0:6] + server.sadd(domain_set_name, field) + print "added in " + domain_set_name +": "+ field def get_date_range(num_day): curr_date = datetime.date.today() @@ -81,9 +76,10 @@ def compute_progression(server, field_name, num_day, url_parsed): #Check value for all members member_set = [] for keyw in server.smembers(redis_progression_name_set): - member_set += (keyw, int(server.hget(redis_progression_name, keyw))) + member_set.append((keyw, int(server.hget(redis_progression_name, keyw)))) + print member_set member_set.sort(key=lambda tup: tup[1]) - if member_set[0] < keyword_increase: + if member_set[0][1] < keyword_increase: #remove min from set and add the new one server.srem(redis_progression_name_set, member_set[0]) server.sadd(redis_progression_name_set, keyword) @@ -106,11 +102,6 @@ if __name__ == '__main__': publisher.info("Makes statistics about valid URL") # REDIS # - r_serv1 = redis.StrictRedis( - host=p.config.get("Redis_Level_DB", "host"), - port=p.config.get("Redis_Level_DB", "port"), - db=p.config.get("Redis_Level_DB", "db")) - r_serv_trend = redis.StrictRedis( host=p.config.get("Redis_Level_DB_Trending", "host"), port=p.config.get("Redis_Level_DB_Trending", "port"), diff --git a/bin/empty_queue.py b/bin/empty_queue.py index a5ccae68..f1b3c453 100755 --- a/bin/empty_queue.py +++ b/bin/empty_queue.py @@ -24,6 +24,7 @@ if __name__ == "__main__": publisher.channel = "Script" config_section = ['Global', 'Duplicates', 'Indexer', 'Attributes', 'Lines', 'DomClassifier', 'Tokenize', 'Curve', 'Categ', 'CreditCards', 'Mail', 'Onion', 'DumpValidOnion', 'Web', 'WebStats', 'Release', 'Credential', 'Cve', 'Phone', 'SourceCode', 'Keys'] + config_section = ['Curve'] for queue in config_section: print 'dropping: ' + queue diff --git a/bin/launch_lvldb.sh b/bin/launch_lvldb.sh index 868a18e3..ef4dcb2c 100755 --- a/bin/launch_lvldb.sh +++ b/bin/launch_lvldb.sh @@ -7,13 +7,17 @@ lvdbhost='127.0.0.1' lvdbdir="${AIL_HOME}/LEVEL_DB_DATA/" db1_y='2013' db2_y='2014' -db2_y='2015' -db2_y='2016' +db3_y='2016' +db4_y='3016' nb_db=13 screen -dmS "LevelDB" sleep 0.1 echo -e $GREEN"\t* Launching Levels DB servers"$DEFAULT -#Add lines here with appropriates options. -screen -S "LevelDB" -X screen -t "2016" bash -c '../redis-leveldb/redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2016/ -P '$db2_y' -M '$nb_db'; read x' +sleep 0.1 +screen -S "LevelDB" -X screen -t "2016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2016/ -P '$db3_y' -M '$nb_db'; read x' + +# For Curve +sleep 0.1 +screen -S "LevelDB" -X screen -t "3016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'3016/ -P '$db4_y' -M '$nb_db'; read x' diff --git a/bin/launch_redis.sh b/bin/launch_redis.sh index ffdbdb58..c7af27c8 100755 --- a/bin/launch_redis.sh +++ b/bin/launch_redis.sh @@ -14,3 +14,6 @@ screen -S "Redis" -X screen -t "6380" bash -c '../redis/src/redis-server '$conf_ sleep 0.1 screen -S "Redis" -X screen -t "6381" bash -c '../redis/src/redis-server '$conf_dir'6381.conf ; read x' +# For Words and curves +sleep 0.1 +screen -S "Redis" -X screen -t "6382" bash -c '../redis/src/redis-server '$conf_dir'6382.conf ; read x' diff --git a/bin/launch_scripts.sh b/bin/launch_scripts.sh index 33930f6f..ad55244a 100755 --- a/bin/launch_scripts.sh +++ b/bin/launch_scripts.sh @@ -9,8 +9,8 @@ sleep 0.1 echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' -#sleep 0.1 -#screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate.py; read x' +sleep 0.1 +screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' sleep 0.1 @@ -34,6 +34,24 @@ screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' sleep 0.1 +screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' +sleep 0.1 screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' sleep 0.1 +screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' +sleep 0.1 +screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' +sleep 0.1 +screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' +sleep 0.1 +screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' +sleep 0.1 screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' +sleep 0.1 +screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' +sleep 0.1 +screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' +sleep 0.1 +screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' +sleep 0.1 +screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index f03114f1..d91018bd 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -91,6 +91,7 @@ class Paste(object): self.p_langage = None self.p_nb_lines = None self.p_max_length_line = None + self.array_line_above_threshold = None self.p_duplicate = None def get_p_content(self): @@ -118,6 +119,21 @@ class Paste(object): def get_p_content_as_file(self): return cStringIO.StringIO(self.get_p_content()) + def get_p_content_with_removed_lines(self, threshold): + num_line_removed = 0 + line_length_threshold = threshold + string_content = "" + f = self.get_p_content_as_file() + line_id = 0 + for line_id, line in enumerate(f): + length = len(line) + if length < line_length_threshold: + string_content += line + else: + num_line_removed+=1 + + return (num_line_removed, string_content) + def get_lines_info(self): """ Returning and setting the number of lines and the maximum lenght of the @@ -136,9 +152,11 @@ class Paste(object): length = len(line) if length >= max_length_line: max_length_line = length + f.close() self.p_nb_lines = line_id self.p_max_length_line = max_length_line + return (self.p_nb_lines, self.p_max_length_line) def _get_p_encoding(self): diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 1406638d..e74b5da2 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -1,6 +1,5 @@ [Directories] bloomfilters = Blooms -#Duplicate_ssdeep dicofilters = Dicos pastes = PASTES @@ -59,11 +58,26 @@ host = localhost port = 6379 db = 1 +[Redis_Paste_Name] +host = localhost +port = 6379 +db = 2 + ##### LevelDB ##### [Redis_Level_DB_Curve] host = localhost -port = 3016 -db = 0 +port = 6382 +db = 1 + +[Redis_Level_DB_Sentiment] +host = localhost +port = 6382 +db = 4 + +[Redis_Level_DB_TermFreq] +host = localhost +port = 6382 +db = 2 [Redis_Level_DB] host = localhost @@ -72,8 +86,8 @@ db = 0 [Redis_Level_DB_Trending] host = localhost -port = 2016 -db = 0 +port = 6382 +db = 3 [Redis_Level_DB_Hashs] host = localhost diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index bdf1bd74..bf6fa695 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -24,6 +24,10 @@ publish = Redis_Words [Curve] subscribe = Redis_Words +publish = Redis_CurveManageTopSets + +[CurveManageTopSets] +subscribe = Redis_CurveManageTopSets [Categ] subscribe = Redis_Global @@ -66,6 +70,9 @@ subscribe = Redis_BrowseWarningPaste #subscribe = Redis_Cve #publish = Redis_BrowseWarningPaste +[SentimentAnalysis] +subscribe = Redis_Global + [Release] subscribe = Redis_Global diff --git a/installing_deps.sh b/installing_deps.sh index 6eecc805..805818e1 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -42,7 +42,7 @@ popd # tlsh test ! -d tlsh && git clone git://github.com/trendmicro/tlsh.git pushd tlsh/ -./make +./make.sh popd # REDIS LEVEL DB # diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 8e9f8c23..45efd8f1 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -5,6 +5,8 @@ import redis import ConfigParser import json import datetime +import time +import calendar from flask import Flask, render_template, jsonify, request import flask import os @@ -49,6 +51,22 @@ r_serv_db = redis.StrictRedis( port=cfg.getint("Redis_Level_DB", "port"), db=cfg.getint("Redis_Level_DB", "db")) +r_serv_sentiment = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_Sentiment", "host"), + port=cfg.getint("Redis_Level_DB_Sentiment", "port"), + db=cfg.getint("Redis_Level_DB_Sentiment", "db")) + +r_serv_term = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_TermFreq", "host"), + port=cfg.getint("Redis_Level_DB_TermFreq", "port"), + db=cfg.getint("Redis_Level_DB_TermFreq", "db")) + +r_serv_pasteName = redis.StrictRedis( + host=cfg.get("Redis_Paste_Name", "host"), + port=cfg.getint("Redis_Paste_Name", "port"), + db=cfg.getint("Redis_Paste_Name", "db")) + + app = Flask(__name__, static_url_path='/static/') @@ -106,7 +124,7 @@ def parseStringToList2(the_string): return res -def showpaste(content_range): +def showpaste(content_range): requested_path = request.args.get('paste', '') paste = Paste.Paste(requested_path) p_date = str(paste._get_p_date()) @@ -129,7 +147,7 @@ def showpaste(content_range): dup_list[2] = int(((tlsh_to_percent - float(dup_list[2])) / tlsh_to_percent)*100) else: dup_list[2] = int(dup_list[2]) - + p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True) # Combine multiple duplicate paste name and format for display @@ -158,7 +176,8 @@ def showpaste(content_range): p_hashtype_list.append(hash_type) if content_range != 0: - p_content = p_content[0:content_range] + p_content = p_content[0:content_range] + return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list) @@ -181,23 +200,32 @@ def get_date_range(num_day): # Iterate over elements in the module provided and return the today data or the last data # return format: [('passed_days', num_of_passed_days), ('elem_name1', elem_value1), ('elem_name2', elem_value2)]] def get_top_relevant_data(server, module_name): - redis_progression_name_set = 'top_'+ module_name +'_set' - days = 0 + days = 0 for date in get_date_range(15): - member_set = [] - for keyw in server.smembers(redis_progression_name_set): - redis_progression_name = module_name+'-'+keyw - keyw_value = server.hget(date ,redis_progression_name) - keyw_value = keyw_value if keyw_value is not None else 0 - member_set.append((keyw, int(keyw_value))) - member_set.sort(key=lambda tup: tup[1], reverse=True) - if member_set[0][1] == 0: #No data for this date + redis_progression_name_set = 'top_'+ module_name +'_set_' + date + member_set = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True) + if len(member_set) == 0: #No data for this date days += 1 - continue else: member_set.insert(0, ("passed_days", days)) return member_set + +def Term_getValueOverRange(word, startDate, num_day): + passed_days = 0 + oneDay = 60*60*24 + to_return = [] + curr_to_return = 0 + for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): + value = r_serv_term.hget(timestamp, word) + curr_to_return += int(value) if value is not None else 0 + for i in num_day: + if passed_days == i-1: + to_return.append(curr_to_return) + passed_days += 1 + return to_return + + # ========= CACHE CONTROL ======== @app.after_request def add_header(response): @@ -225,19 +253,19 @@ def progressionCharts(): attribute_name = request.args.get('attributeName') trending_name = request.args.get('trendingName') bar_requested = True if request.args.get('bar') == "true" else False - + if (bar_requested): num_day = int(request.args.get('days')) bar_values = [] - date_range = get_date_range(num_day) + date_range = get_date_range(num_day) # Retreive all data from the last num_day for date in date_range: curr_value = r_serv_charts.hget(attribute_name, date) bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) bar_values.insert(0, attribute_name) return jsonify(bar_values) - + else: redis_progression_name = 'top_progression_'+trending_name redis_progression_name_set = 'top_progression_'+trending_name+'_set' @@ -252,25 +280,25 @@ def progressionCharts(): if len(member_set) == 0: member_set.append(("No relevant data", int(100))) return jsonify(member_set) - + @app.route("/_moduleCharts", methods=['GET']) def modulesCharts(): keyword_name = request.args.get('keywordName') module_name = request.args.get('moduleName') bar_requested = True if request.args.get('bar') == "true" else False - + if (bar_requested): num_day = int(request.args.get('days')) bar_values = [] - date_range = get_date_range(num_day) + date_range = get_date_range(num_day) # Retreive all data from the last num_day for date in date_range: curr_value = r_serv_charts.hget(date, module_name+'-'+keyword_name) bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) bar_values.insert(0, keyword_name) return jsonify(bar_values) - + else: member_set = get_top_relevant_data(r_serv_charts, module_name) if len(member_set) == 0: @@ -283,52 +311,33 @@ def providersChart(): keyword_name = request.args.get('keywordName') module_name = request.args.get('moduleName') bar_requested = True if request.args.get('bar') == "true" else False - + if (bar_requested): num_day = int(request.args.get('days')) bar_values = [] - date_range = get_date_range(num_day) + date_range = get_date_range(num_day) # Retreive all data from the last num_day for date in date_range: curr_value_size = r_serv_charts.hget(keyword_name+'_'+'size', date) curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date) + curr_value_size_avg = r_serv_charts.hget(keyword_name+'_'+'avg', date) if module_name == "size": - curr_value_num = curr_value_num if curr_value_num is not None else 0 - curr_value_num = curr_value_num if int(curr_value_num) != 0 else 10000000000 - curr_value = float(curr_value_size if curr_value_size is not None else 0.0) / float(curr_value_num) + curr_value = float(curr_value_size_avg if curr_value_size_avg is not None else 0) else: curr_value = float(curr_value_num if curr_value_num is not None else 0.0) bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], curr_value]) bar_values.insert(0, keyword_name) return jsonify(bar_values) - + else: - redis_provider_name_set = 'top_size_set' if module_name == "size" else 'providers_set' - - # Iterate over element in top_x_set and retreive their value - member_set = [] - for keyw in r_serv_charts.smembers(redis_provider_name_set): - redis_provider_name_size = keyw+'_'+'size' - redis_provider_name_num = keyw+'_'+'num' - keyw_value_size = r_serv_charts.hget(redis_provider_name_size, get_date_range(0)[0]) - keyw_value_size = keyw_value_size if keyw_value_size is not None else 0.0 - keyw_value_num = r_serv_charts.hget(redis_provider_name_num, get_date_range(0)[0]) - - if keyw_value_num is not None: - keyw_value_num = int(keyw_value_num) - else: - if module_name == "size": - keyw_value_num = 10000000000 - else: - keyw_value_num = 0 - if module_name == "size": - member_set.append((keyw, float(keyw_value_size)/float(keyw_value_num))) - else: - member_set.append((keyw, float(keyw_value_num))) - - member_set.sort(key=lambda tup: tup[1], reverse=True) + #redis_provider_name_set = 'top_size_set' if module_name == "size" else 'providers_set' + redis_provider_name_set = 'top_avg_size_set_' if module_name == "size" else 'providers_set_' + redis_provider_name_set = redis_provider_name_set + get_date_range(0)[0] + + member_set = r_serv_charts.zrevrangebyscore(redis_provider_name_set, '+inf', '-inf', withscores=True, start=0, num=8) + # Member set is a list of (value, score) pairs if len(member_set) == 0: member_set.append(("No relevant data", float(100))) return jsonify(member_set) @@ -344,7 +353,22 @@ def search(): c = [] #preview of the paste content paste_date = [] paste_size = [] - # Search + + # Search filename + print r_serv_pasteName.smembers(q[0]) + for path in r_serv_pasteName.smembers(q[0]): + print path + r.append(path) + paste = Paste.Paste(path) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + c.append(content[0:content_range]) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + paste_date.append(curr_date) + paste_size.append(paste._get_p_size()) + + # Search full line from whoosh import index from whoosh.fields import Schema, TEXT, ID schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) @@ -360,11 +384,11 @@ def search(): paste = Paste.Paste(x.items()[0][1]) content = paste.get_p_content().decode('utf8', 'ignore') content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - c.append(content[0:content_range]) + c.append(content[0:content_range]) curr_date = str(paste._get_p_date()) curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_size.append(paste._get_p_size()) + paste_date.append(curr_date) + paste_size.append(paste._get_p_size()) return render_template("search.html", r=r, c=c, query=request.form['query'], paste_date=paste_date, paste_size=paste_size, char_to_display=max_preview_modal) @@ -417,11 +441,11 @@ def importantPasteByModule(): paste = Paste.Paste(path) content = paste.get_p_content().decode('utf8', 'ignore') content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - all_content.append(content[0:content_range]) + all_content.append(content[0:content_range]) curr_date = str(paste._get_p_date()) curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_linenum.append(paste.get_lines_info()[0]) + paste_date.append(curr_date) + paste_linenum.append(paste.get_lines_info()[0]) return render_template("important_paste_by_module.html", all_path=all_path, content=all_content, paste_date=paste_date, paste_linenum=paste_linenum, char_to_display=max_preview_modal) @@ -429,6 +453,299 @@ def importantPasteByModule(): def moduletrending(): return render_template("Moduletrending.html") +@app.route("/sentiment_analysis_trending/") +def sentiment_analysis_trending(): + return render_template("sentiment_analysis_trending.html") + + +@app.route("/sentiment_analysis_getplotdata/") +def sentiment_analysis_getplotdata(): + # Get the top providers based on number of pastes + oneHour = 60*60 + sevenDays = oneHour*24*7 + dateStart = datetime.datetime.now() + dateStart = dateStart.replace(minute=0, second=0, microsecond=0) + dateStart_timestamp = calendar.timegm(dateStart.timetuple()) + + to_return = {} + range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(0)[0], '+inf', '-inf', start=0, num=8) + # if empty, get yesterday top providers + print 'providers_set_'+ get_date_range(1)[1] + range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(1)[1], '+inf', '-inf', start=0, num=8) if range_providers == [] else range_providers + # if still empty, takes from all providers + if range_providers == []: + print 'today provider empty' + range_providers = r_serv_charts.smembers('all_provider_set') + + for cur_provider in range_providers: + print cur_provider + cur_provider_name = cur_provider + '_' + list_date = {} + for cur_timestamp in range(int(dateStart_timestamp), int(dateStart_timestamp)-sevenDays-oneHour, -oneHour): + cur_set_name = cur_provider_name + str(cur_timestamp) + + list_value = [] + for cur_id in r_serv_sentiment.smembers(cur_set_name): + cur_value = r_serv_sentiment.get(cur_id) + list_value.append(cur_value) + list_date[cur_timestamp] = list_value + to_return[cur_provider] = list_date + + return jsonify(to_return) + + + +@app.route("/sentiment_analysis_plot_tool/") +def sentiment_analysis_plot_tool(): + return render_template("sentiment_analysis_plot_tool.html") + + + +@app.route("/sentiment_analysis_plot_tool_getdata/", methods=['GET']) +def sentiment_analysis_plot_tool_getdata(): + getProviders = request.args.get('getProviders') + + if getProviders == 'True': + providers = [] + for cur_provider in r_serv_charts.smembers('all_provider_set'): + providers.append(cur_provider) + return jsonify(providers) + + else: + query = request.args.get('query') + query = query.split(',') + Qdate = request.args.get('Qdate') + + date1 = (Qdate.split('-')[0]).split('.') + date1 = datetime.date(int(date1[2]), int(date1[1]), int(date1[0])) + + date2 = (Qdate.split('-')[1]).split('.') + date2 = datetime.date(int(date2[2]), int(date2[1]), int(date2[0])) + + timestamp1 = calendar.timegm(date1.timetuple()) + timestamp2 = calendar.timegm(date2.timetuple()) + + oneHour = 60*60 + oneDay = oneHour*24 + + to_return = {} + for cur_provider in query: + list_date = {} + cur_provider_name = cur_provider + '_' + for cur_timestamp in range(int(timestamp1), int(timestamp2)+oneDay, oneHour): + cur_set_name = cur_provider_name + str(cur_timestamp) + + list_value = [] + for cur_id in r_serv_sentiment.smembers(cur_set_name): + cur_value = r_serv_sentiment.get(cur_id) + list_value.append(cur_value) + list_date[cur_timestamp] = list_value + to_return[cur_provider] = list_date + + return jsonify(to_return) + + +@app.route("/terms_management/") +def terms_management(): + TrackedTermsSet_Name = "TrackedSetTermSet" + BlackListTermsSet_Name = "BlackListSetTermSet" + TrackedTermsDate_Name = "TrackedTermDate" + BlackListTermsDate_Name = "BlackListTermDate" + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + track_list = [] + track_list_values = [] + track_list_num_of_paste = [] + for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): + track_list.append(tracked_term) + value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31]) + + term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) + + set_paste_name = "tracked_" + tracked_term + track_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" + value_range.append(term_date) + track_list_values.append(value_range) + + + black_list = [] + for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): + term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term) + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" + black_list.append([blacked_term, term_date]) + + return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste) + + +@app.route("/terms_management_query_paste/") +def terms_management_query_paste(): + term = request.args.get('term') + TrackedTermsSet_Name = "TrackedSetTermSet" + paste_info = [] + + set_paste_name = "tracked_" + term + track_list_path = r_serv_term.smembers(set_paste_name) + + for path in track_list_path: + paste = Paste.Paste(path) + p_date = str(paste._get_p_date()) + p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] + p_source = paste.p_source + p_encoding = paste._get_p_encoding() + p_size = paste.p_size + p_mime = paste.p_mime + p_lineinfo = paste.get_lines_info() + p_content = paste.get_p_content().decode('utf-8', 'ignore') + if p_content != 0: + p_content = p_content[0:400] + paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) + + return jsonify(paste_info) + + +@app.route("/terms_management_query/") +def terms_management_query(): + TrackedTermsDate_Name = "TrackedTermDate" + BlackListTermsDate_Name = "BlackListTermDate" + term = request.args.get('term') + section = request.args.get('section') + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + value_range = Term_getValueOverRange(term, today_timestamp, [1, 7, 31]) + + if section == "followTerm": + term_date = r_serv_term.hget(TrackedTermsDate_Name, term) + elif section == "blacklistTerm": + term_date = r_serv_term.hget(BlackListTermsDate_Name, term) + + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" + value_range.append(str(term_date)) + return jsonify(value_range) + + +@app.route("/terms_management_action/", methods=['GET']) +def terms_management_action(): + TrackedTermsSet_Name = "TrackedSetTermSet" + TrackedTermsDate_Name = "TrackedTermDate" + BlackListTermsDate_Name = "BlackListTermDate" + BlackListTermsSet_Name = "BlackListSetTermSet" + + today = datetime.datetime.now() + today = today.replace(microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + + section = request.args.get('section') + action = request.args.get('action') + term = request.args.get('term') + if action is None or term is None: + return "None" + else: + if section == "followTerm": + if action == "add": + r_serv_term.sadd(TrackedTermsSet_Name, term.lower()) + r_serv_term.hset(TrackedTermsDate_Name, term, today_timestamp) + else: + r_serv_term.srem(TrackedTermsSet_Name, term.lower()) + elif section == "blacklistTerm": + if action == "add": + r_serv_term.sadd(BlackListTermsSet_Name, term.lower()) + r_serv_term.hset(BlackListTermsDate_Name, term, today_timestamp) + else: + r_serv_term.srem(BlackListTermsSet_Name, term.lower()) + else: + return "None" + + to_return = {} + to_return["section"] = section + to_return["action"] = action + to_return["term"] = term + return jsonify(to_return) + + + +@app.route("/terms_plot_tool/") +def terms_plot_tool(): + term = request.args.get('term') + if term is not None: + return render_template("terms_plot_tool.html", term=term) + else: + return render_template("terms_plot_tool.html", term="") + + +@app.route("/terms_plot_tool_data/") +def terms_plot_tool_data(): + oneDay = 60*60*24 + range_start = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_start')))) if request.args.get('range_start') is not None else 0; + range_start = range_start.replace(hour=0, minute=0, second=0, microsecond=0) + range_start = calendar.timegm(range_start.timetuple()) + range_end = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_end')))) if request.args.get('range_end') is not None else 0; + range_end = range_end.replace(hour=0, minute=0, second=0, microsecond=0) + range_end = calendar.timegm(range_end.timetuple()) + term = request.args.get('term') + + if term is None: + return "None" + else: + value_range = [] + for timestamp in range(range_start, range_end+oneDay, oneDay): + print timestamp, term + value = r_serv_term.hget(timestamp, term) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + return jsonify(value_range) + + +@app.route("/terms_plot_top/") +def terms_plot_top(): + return render_template("terms_plot_top.html") + + +@app.route("/terms_plot_top_data/") +def terms_plot_top_data(): + oneDay = 60*60*24 + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + set_day = "TopTermFreq_set_day_" + str(today_timestamp) + set_week = "TopTermFreq_set_week"; + set_month = "TopTermFreq_set_month"; + + the_set = request.args.get('set') + num_day = int(request.args.get('num_day')) + if the_set is None: + return "None" + else: + to_return = [] + if the_set == "TopTermFreq_set_day": + the_set += "_" + str(today_timestamp) + + for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): + position = {} + position['day'] = r_serv_term.zrevrank(set_day, term) + position['day'] = position['day']+1 if position['day'] is not None else "<20" + position['week'] = r_serv_term.zrevrank(set_week, term) + position['week'] = position['week']+1 if position['week'] is not None else "<20" + position['month'] = r_serv_term.zrevrank(set_month, term) + position['month'] = position['month']+1 if position['month'] is not None else "<20" + value_range = [] + for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): + value = r_serv_term.hget(timestamp, term) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + + to_return.append([term, value_range, tot_value, position]) + + return jsonify(to_return) + + @app.route("/showsavedpaste/") #completely shows the paste in a new tab def showsavedpaste(): @@ -446,7 +763,7 @@ def getmoredata(): paste = Paste.Paste(requested_path) p_content = paste.get_p_content().decode('utf-8', 'ignore') to_return = p_content[max_preview_modal-1:] - return to_return + return to_return if __name__ == "__main__": diff --git a/var/www/static/css/switch_checkbox.css b/var/www/static/css/switch_checkbox.css new file mode 100644 index 00000000..07991af3 --- /dev/null +++ b/var/www/static/css/switch_checkbox.css @@ -0,0 +1,68 @@ +.switch { + position: relative; + display: inline-block; + vertical-align: top; + width: 56px; + height: 20px; + padding: 3px; + margin-left: 5px; + border-radius: 18px; + box-shadow: inset 0 -1px #fff,inset 0 1px 1px rgba(0,0,0,0.05); + cursor: pointer; + -webkit-box-sizing: content-box; + -moz-box-sizing: content-box; + box-sizing: content-box; +} + +.switch-input { + position: absolute; + top: 0; + left: 0; + opacity: 0; +} + +.switch-input:checked ~ .switch-label { + background: #67c2ef; + box-shadow: inset 0 1px 2px rgba(0,0,0,0.15),inset 0 0 3px rgba(0,0,0,0.2); +} +.switch-label { + position: relative; + display: block; + height: inherit; + font-size: 10px; + text-transform: uppercase; + background: #f9f9f9; + border-radius: inherit; + box-shadow: inset 0 1px 2px rgba(0,0,0,0.12),inset 0 0 2px rgba(0,0,0,0.15); + -webkit-transition: .15s ease-out; + -moz-transition: .15s ease-out; + -o-transition: .15s ease-out; + transition: .15s ease-out; + -webkit-transition-property: opacity background; + -moz-transition-property: opacity background; + -o-transition-property: opacity background; + transition-property: opacity background; +} + +.switch-input:checked ~ .switch-handle { + left: 40px; + box-shadow: -1px 1px 5px rgba(0,0,0,0.2); +} +.switch-handle { + position: absolute; + top: 4px; + left: 4px; + width: 18px; + height: 18px; + background: white; + border-radius: 10px; + box-shadow: 1px 1px 5px rgba(0,0,0,0.2); + background-image: -webkit-linear-gradient(top,#fff 40%,#f0f0f0); + background-image: -moz-linear-gradient(top,#fff 40%,#f0f0f0); + background-image: -o-linear-gradient(top,#fff 40%,#f0f0f0); + background-image: linear-gradient(to bottom,#fff 40%,#f0f0f0); + -webkit-transition: left .15s ease-out; + -moz-transition: left .15s ease-out; + -o-transition: left .15s ease-out; + transition: left .15s ease-out; +} diff --git a/var/www/static/js/FlexGauge.js b/var/www/static/js/FlexGauge.js new file mode 100644 index 00000000..3aa2bf7a --- /dev/null +++ b/var/www/static/js/FlexGauge.js @@ -0,0 +1,440 @@ +/** + * FlexGauge + * Version: 1.0 + * Author: Jeff Millies + * Author URI: + * + * Slight modification for better display in Sentiment webpages + */ +(function ($) { + var FlexGauge = function (o) { + if (typeof o === 'object') { + this._extendOptions(o, false); + this._build(); + } + }; + FlexGauge.prototype = { + /** + * {String} Element that you would like to append to. ie '#idname', '.classname', 'div#idname', etc.. + */ + appendTo: 'body', + /** + * {String} Id of Canvas already created or Id of canvas that will be created automatically + */ + elementId: 'canvas', + /** + * {String} Class of canvas created + */ + elementClass: 'canvas', + /** + * {Int} Canvas Width & Height + */ + elementWidth: 200, + elementHeight: 200, + /** + * {Boolean|String} Generate Dial Value for the Gauge, true will use arcFillPercent or arcFillInt + * depending on provided values and specified dialUnits, string will use specified value + */ + dialValue: false, + /** + * {String} Class applied to div when dial is generated. + */ + dialClass: 'fg-dial', + /** + * {string: %|$| } Type of unit to use for the dial + */ + dialUnit: '%', + /** + * {string: before|after} Where the dial unit will be displayed + */ + dialUnitPosition: 'after', + /** + * {Boolean|String} Generate Label for the Gauge, true will use default "FlexGauge", string will use specified + */ + dialLabel: false, + /** + * {String} Class applied to div when label is generated. + */ + dialLabelClass: 'fg-dial-label', + /** + * {Int} Radius of the arc + */ + inc: 0.0, + incTot: 1.0, + /** + * {Doule} Increment value + */ + arcSize: 85, + /** + * {double} Starting and Ending location of the arc, End always needs to be larger + * arc(x, y, radius, startAngle, endAngle, anticlockwise) + */ + arcAngleStart: 0.85, + arcAngleEnd: 2.15, + /** + * {double} Percentage the arc fills + */ + arcFillPercent: .5, + /** + * {Int} Starting and Ending values that are used to + * find a difference for amount of units + * ie: 60 (arcFillEnd) - 10 (arcFillStart) = 50 + */ + arcFillStart: null, + arcFillEnd: null, + /** + * {Int} Data used to find out what percentage of the + * arc to fill. arcFillInt can be populated by + * the difference of arcFillStart and arcFillEnd + */ + arcFillInt: null, + arcFillTotal: null, + /** + * {Int} Color lightness: 0 - 255, 0 having no white added, 255 having all white and no color + */ + arcBgColorLight: 80, + /** + * {Int} Color saturation: 0 - 100, 0 having no color, 100 is full color + */ + arcBgColorSat: 60, + /** + * {Int} Size of the line marking the percentage + */ + arcStrokeFg: 30, + /** + * {Int} Size of the container holding the line + */ + arcStrokeBg: 30, + + /** + * {string: hex} Color of the line marking the percentage + */ + colorArcFg: '#5bc0de', + /** + * {string: hex} Color of the container holding the line, default is using the Fg color and lightening it + */ + colorArcBg: null, + + /** + * {String} Instead of providing a color or hex for the color, you can provide a class from the style + * sheet and specify what you would like to grab for the color in styleSrc + */ + styleArcFg: null, + styleArcBg: null, + styleSrc: 'color', + + /** + * {Boolean} If set to false, then the graph will not be animated + */ + animateEasing: true, + /** + * {Int} Speed for the animation, 1 is fastest, higher the number, slower the animation + */ + animateSpeed: 5, + /** + * {Int} Math used in animation speed + */ + animateNumerator: 12, + animateDivisor: 15, + + /** + * {double} Placeholder for current percentage while animating + */ + _animatePerc: 0.00, + + /** + * {Object} Placeholder for setInterval + */ + _animateLoop: null, + + /** + * {Object} Placeholder for canvas + */ + _canvas: null, + + /** + * {Object} Placeholder for canvas context + */ + _ctx: null, + + update: function (o) { + if (typeof o === 'object') { + var difference; + + // if using int, convert to percent to check difference + if (typeof o.arcFillInt !== 'undefined' && o.arcFillInt == this.arcFillInt && + typeof o.arcFillTotal !== 'undefined' && o.arcFillTotal == this.arcFillTotal) { + o.arcFillPercent = this.arcFillPercent; + } else if (typeof o.arcFillInt !== 'undefined' && typeof o.arcFillTotal !== 'undefined' && + (o.arcFillInt != this.arcFillInt || o.arcFillTotal == this.arcFillTotal)) { + o.arcFillPercent = (o.arcFillInt / o.arcFillTotal); + } else if (typeof o.arcFillInt !== 'undefined' && typeof o.arcFillTotal === 'undefined' && + (o.arcFillInt != this.arcFillInt)) { + o.arcFillPercent = (o.arcFillInt / this.arcFillTotal); + } + + if (typeof o.arcFillPercent !== 'undefined') { + difference = Math.abs((this.arcFillPercent - o.arcFillPercent)); + } else { + difference = this.arcFillPercent; + } + + this._extendOptions(o, true); + + clearInterval(this._animateLoop); + + if (difference > 0) { + var that = this; + this._animateLoop = setInterval(function () { + return that._animate(); + }, (this.animateSpeed * this.animateNumerator) / (difference * this.animateDivisor)); + } + } + }, + + _extendOptions: function (o, update) { + var color = false; + if (update) + color = this.colorArcFg; + + $.extend(this, o, true); + + if (typeof o.arcFillStart !== 'undefined' && typeof o.arcFillEnd !== 'undefined' && typeof o.arcFillTotal !== 'undefined') { + this.arcFillInt = (o.arcFillEnd - o.arcFillStart); + } + + if (typeof o.arcFillPercent === 'undefined' && this.arcFillInt !== null && this.arcFillInt >= 0 && this.arcFillTotal !== null && this.arcFillTotal > 0) { + this.arcFillPercent = this.arcFillInt / this.arcFillTotal; + } + + if (typeof o.elementId === 'undefined') { + this.elementId = 'fg-' + this.appendTo + '-canvas'; + } + // supporting color if pass, changing to hex + if (typeof o.colorArcFg !== 'undefined') { + this.colorArcFg = colorToHex(o.colorArcFg); + } + + if (typeof o.colorArcBg !== 'undefined') { + this.colorArcBg = colorToHex(o.colorArcBg); + } + + // only use the styleArcFg if colorArcFg wasn't specified in the options + if (typeof o.styleArcFg !== 'undefined' && typeof o.colorArcFg === 'undefined') { + this.colorArcFg = getStyleRuleValue(this.styleSrc, this.styleArcFg); + } + + if (typeof o.colorArcBg === 'undefined' && this.colorArcBg === null && this.colorArcFg !== null) { + this.colorArcBg = this.colorArcFg; + } + + if (typeof this.colorArcBg !== null && (!update || colorToHex(this.colorArcFg) != colorToHex(color))) { + if (colorToHex(this.colorArcFg) != colorToHex(color)) + this.colorArcBg = this.colorArcFg; + + this.colorArcBg = shadeColor(this.colorArcBg, this.arcBgColorLight, this.arcBgColorSat); + } + + if (typeof o.dialLabel === 'boolean' && o.dialLabel) { + this.dialLabel = 'FlexGauge'; + } + + }, + + _build: function () { + if (document.getElementById(this.elementId) === null) { + $(this.appendTo).append(''); + } + + this._canvas = document.getElementById(this.elementId); + this._ctx = this._canvas.getContext("2d"); + + this.arcAngleStart = this.arcAngleStart * Math.PI; + this.arcAngleEnd = this.arcAngleEnd * Math.PI; + if (this.animateEasing === false) { + this._animatePerc = this.arcFillPercent; + } + + var that = this; + this._animateLoop = setInterval(function () { + return that._animate(); + }, (this.animateSpeed * this.animateNumerator) / (this.arcFillPercent * this.animateDivisor)); + }, + + _animate: function () { + var animateInt = Math.round(this._animatePerc * 100); + var arcInt = Math.round(this.arcFillPercent * 100); + + if (animateInt < arcInt) + animateInt++; + else + animateInt--; + + this._animatePerc = (animateInt / 100); + if (animateInt === arcInt) { + this.arcFillPercent = this._animatePerc; + clearInterval(this._animateLoop); + this._draw(); + } + this._draw(); + }, + + _draw: function () { + //Clear the canvas everytime a chart is drawn + this._ctx.clearRect(0, 0, this.elementWidth, this.elementHeight); + + //Background 360 degree arc + this._ctx.beginPath(); + this._ctx.strokeStyle = this.colorArcBg; + this._ctx.lineWidth = this.arcStrokeBg; + this._ctx.arc( + this.elementWidth / 2, + this.elementHeight / 2 + 50, + this.arcSize, + 0, + Math.PI, + true + ); + + this._ctx.stroke(); + + //var newEnd = ((this.arcAngleEnd - this.arcAngleStart) * this._animatePerc) + this.arcAngleStart; + var newStart; + var newEnd; + + var incArc = this.inc*Math.PI/2; + if (this.inc >= 0.0){ + newStart = -Math.PI/2; + newEnd = newStart + incArc; + } else { + newStart = -Math.PI/2 + incArc; + newEnd = -Math.PI/2; + } + + var colorShadesTabRed = ['#ff0000','#ff4000','#ff8000','#ff9900','#ffbf00','#ffff00']; + var colorShadesTabGreen = ['#ffff00','#E0FF00','#D0FF00','#a0ff00','#00ff00','#00ff40',]; + var colorValue = parseInt(Math.abs((this.inc / this.incTot) * 5)); + var theColor; + if (this.inc >= 0.0) + theColor = colorShadesTabGreen[colorValue]; + else + theColor = colorShadesTabRed[5-colorValue]; + this.colorArcFg = theColor; + + this._ctx.beginPath(); + this._ctx.strokeStyle = this.colorArcFg; + this._ctx.lineWidth = this.arcStrokeFg; + this._ctx.arc( + this.elementWidth / 2, + this.elementHeight / 2 + 50, + this.arcSize, + newStart, + newEnd, + false + ); + this._ctx.stroke(); + this._renderLabel(); + }, + + _renderLabel: function () { + if (this.dialValue) { + var dialVal; + var dial = $(this.appendTo).find('div.' + this.dialClass); + if (dial.length === 0) { + $(this.appendTo).append('
'); + } + dial = $(this.appendTo).find('div.' + this.dialClass); + if (typeof this.dialValue === 'boolean') { + switch (this.dialUnit) { + case '%': + dialVal = Math.round(this._animatePerc * 100); + break; + default: + dialVal = Math.round(this.arcFillInt * (this._animatePerc / this.arcFillPercent)); + break; + } + dialVal = (isNaN(dialVal) ? 0 : dialVal); + switch (this.dialUnitPosition) { + case 'before': + dialVal = this.dialUnit + dialVal; + break; + case 'after': + dialVal = dialVal + this.dialUnit; + break; + } + } else { + dialVal = this.dialValue; + } + dial.html(dialVal) + } + if (this.dialLabel) { + var label = $(this.appendTo).find('div.' + this.dialLabelClass); + if (label.length === 0) { + $(this.appendTo).append('
'); + } + label = $(this.appendTo).find('div.' + this.dialLabelClass); + label.html(this.dialLabel); + } + } + }; + + function shadeColor(col, amt, sat) { + if (col[0] == "#") { + col = col.slice(1); + } + + var num = parseInt(col, 16); + + var r = (num >> 16) + amt; + + if (r > 255) r = 255; + else if (r < 0) r = 0; + + var b = ((num >> 8) & 0x00FF) + amt; + + if (b > 255) b = 255; + else if (b < 0) b = 0; + + var g = (num & 0x0000FF) + amt; + + if (g > 255) g = 255; + else if (g < 0) g = 0; + + var gray = r * 0.3086 + g * 0.6094 + b * 0.0820; + sat = (sat / 100); + + r = Math.round(r * sat + gray * (1 - sat)); + g = Math.round(g * sat + gray * (1 - sat)); + b = Math.round(b * sat + gray * (1 - sat)); + return "#" + (g | (b << 8) | (r << 16)).toString(16); + } + + function getStyleRuleValue(style, selector) { + $('body').append('
'); + var element = $('#getStyleRuleValue-' + selector); + element.addClass(selector); + var color = element.css(style); + var hex = colorToHex(color); + element.remove(); + return hex; + } + + function colorToHex(color) { + if (color[0] != 'r') + return color; + + var rgb = color.match(/^rgb\((\d+),\s*(\d+),\s*(\d+)\)$/); + return "#" + + ("0" + parseInt(rgb[1], 10).toString(16)).slice(-2) + + ("0" + parseInt(rgb[2], 10).toString(16)).slice(-2) + + ("0" + parseInt(rgb[3], 10).toString(16)).slice(-2); + } + + if (typeof define === 'function') { + define('flex-gauge', ['jquery'], function ($) { + return FlexGauge; + }); + } else { + window.FlexGauge = FlexGauge; + } +})(jQuery); diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index 69ffd32e..f0cd101f 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -309,7 +309,13 @@ $(document).ready(function () { var tmp_values2 = []; refresh(); update_values(); - create_queue_table(); + + if($('#button-toggle-queues').prop('checked')){ + create_queue_table(); + } + else{ + $("#queueing").html(''); + } for (i = 0; i < (glob_tabvar.row1).length; i++){ diff --git a/var/www/static/js/sentiment_plot.js b/var/www/static/js/sentiment_plot.js new file mode 100644 index 00000000..b8541247 --- /dev/null +++ b/var/www/static/js/sentiment_plot.js @@ -0,0 +1,94 @@ +var li_text = "
  • " + + +/* Get Providers List and display them by row */ +$.getJSON('/sentiment_analysis_plot_tool_getdata/?getProviders=True', function(data){ + for(i=0; i=0; i--){ + for(j=0; j<24; j++){ + var t1 =now.getDate()-i + ":"; + var t2 =now.getHours()-(23-j); + t2 = t2 < 0 ? 24+t2 : t2; + t2 += "h"; + to_ret[j+24*(day-i)] = t1+t2; + } + } + return to_ret; + }; + + var offset_to_time = generate_offset_to_time(23); + var offset_to_date = generate_offset_to_date(7); + + var sparklineOptions = { + height: 80,//Height of the chart - Defaults to 'auto' (line height of the containing tag) + + chartRangeMin: -1, + chartRangeMax: 1, + + type: 'bar', + barSpacing: 0, + barWidth: 2, + barColor: '#00bf5f', + negBarColor: '#f22929', + zeroColor: '#ffff00', + + tooltipFormat: ' {{offset:names}}, {{value}} ', +}; + + +$.getJSON("/sentiment_analysis_getplotdata/", + function(data) { + var all_data = []; + var plot_data = []; + var graph_avg = []; + var array_provider = Object.keys(data); + var dates_providers = Object.keys(data[array_provider[0]]); + var dateStart = parseInt(dates_providers[0]); + var oneHour = 60*60; + var oneWeek = oneHour*24*7; + + var all_graph_day_sum = 0.0; + var all_graph_hour_sum = 0.0; + var all_graph_hour_maxVal = 0.0; + var all_day_avg = 0.0; + var all_day_avg_maxVal = 0.0; + + for (graphNum=0; graphNum<8; graphNum++) { + var max_value = 0.0; + var max_value_day = 0.0; + var graph_data = []; + var spark_data = []; + var curr_provider = array_provider[graphNum]; + var curr_sum = 0.0; + var curr_sum_elem = 0.0; + var day_sum = 0.0; + var day_sum_elem = 0.0; + var hour_sum = 0.0; + + for(curr_date=dateStart+oneHour; curr_date<=dateStart+oneWeek; curr_date+=oneHour){ + var data_array = data[curr_provider][curr_date]; + + if (data_array.length == 0){ + graph_data.push({'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}); + spark_data.push(0); + } else { //compute avg for a given date for a given graph + var compPosAvg = 0; + var compNegAvg = 0; + var pos = 0; + var neg = 0; + var neu = 0; + + for(i=0; i max_value ? Math.abs(pos-neg) : max_value; + + if(curr_date >= dateStart+oneWeek-23*oneHour){ + max_value_day = Math.abs(pos-neg) > max_value_day ? Math.abs(pos-neg) : max_value_day; + day_sum += (pos-neg); + day_sum_elem++; + } + if(curr_date > dateStart+oneWeek-2*oneHour && curr_date <=dateStart+oneWeek-oneHour){ + hour_sum += (pos-neg); + } + + } + } + all_graph_day_sum += day_sum; + all_graph_hour_sum += hour_sum; + all_graph_hour_maxVal = Math.abs(hour_sum) > all_graph_hour_maxVal ? Math.abs(hour_sum) : all_graph_hour_maxVal; + + var curr_avg = curr_sum / (curr_sum_elem); + if(isNaN(curr_avg)) + curr_avg = 0.0 + //var curr_avg = curr_sum / (oneWeek/oneHour); + //var curr_avg = curr_sum / (spark_data.length); + graph_avg.push([curr_provider, curr_avg]); + plot_data.push(spark_data); + all_data.push(graph_data); + + + sparklineOptions.chartRangeMax = max_value; + sparklineOptions.chartRangeMin = -max_value; + sparklineOptions.tooltipValueLookups = { names: offset_to_date}; + + // print week + var num = graphNum + 1; + var placeholder = '.sparkLineStatsWeek' + num; + sparklineOptions.barWidth = 2; + $(placeholder).sparkline(plot_data[graphNum], sparklineOptions); + $(placeholder+'t').text(curr_provider); + var curr_avg_text = isNaN(curr_avg) ? "No data" : curr_avg.toFixed(5); + $(placeholder+'s').text(curr_avg_text); + + sparklineOptions.barWidth = 18; + sparklineOptions.tooltipFormat = ' Avg: {{value}} ' + $(placeholder+'b').sparkline([curr_avg], sparklineOptions); + sparklineOptions.tooltipFormat = ' {{offset:names}}, {{value}} ' + + sparklineOptions.tooltipValueLookups = { names: offset_to_time}; + sparklineOptions.chartRangeMax = max_value_day; + sparklineOptions.chartRangeMin = -max_value_day; + + var avgName = ".pannelWeek" + num; + if (curr_avg > 0) { + $(avgName).addClass("panel-success") + } else if(curr_avg < 0) { + $(avgName).addClass("panel-danger") + } else if(isNaN(curr_avg)) { + $(avgName).addClass("panel-info") + } else { + $(avgName).addClass("panel-warning") + } + + + + // print today + var data_length = plot_data[graphNum].length; + var data_today = plot_data[graphNum].slice(data_length-24, data_length); + + placeholder = '.sparkLineStatsToday' + num; + sparklineOptions.barWidth = 14; + $(placeholder).sparkline(data_today, sparklineOptions); + $(placeholder+'t').text(curr_provider); + + sparklineOptions.barWidth = 18; + sparklineOptions.tooltipFormat = ' Avg: {{value}} ' + //var day_avg = day_sum/24; + var day_avg = isNaN(day_sum/day_sum_elem) ? 0 : day_sum/day_sum_elem; + var day_avg_text = isNaN(day_sum/day_sum_elem) ? 'No data' : (day_avg).toFixed(5); + all_day_avg += day_avg; + all_day_avg_maxVal = Math.abs(day_avg) > all_day_avg_maxVal ? Math.abs(day_avg) : all_day_avg_maxVal; + $(placeholder+'b').sparkline([day_avg], sparklineOptions); + sparklineOptions.tooltipFormat = ' {{offset:names}}, {{value}} ' + $(placeholder+'s').text(day_avg_text); + + avgName = ".pannelToday" + num; + if (day_avg > 0) { + $(avgName).addClass("panel-success") + } else if(day_avg < 0) { + $(avgName).addClass("panel-danger") + } else if(isNaN(day_sum/day_sum_elem)) { + $(avgName).addClass("panel-info") + } else { + $(avgName).addClass("panel-warning") + } + + }//for loop + + + + /* ---------------- Gauge ---------------- */ + var gaugeOptions = { + animateEasing: true, + + elementWidth: 200, + elementHeight: 125, + + arcFillStart: 10, + arcFillEnd: 12, + arcFillTotal: 20, + incTot: 1.0, + + arcBgColorLight: 200, + arcBgColorSat: 0, + arcStrokeFg: 20, + arcStrokeBg: 30, + + colorArcFg: '#FF3300', + animateSpeed: 1, + + }; + // Clone object + var gaugeOptions2 = jQuery.extend(true, {}, gaugeOptions); + var gaugeOptions3 = jQuery.extend(true, {}, gaugeOptions); + + + + gaugeOptions.appendTo = '#gauge_today_last_hour'; + gaugeOptions.dialLabel = 'Last hour'; + gaugeOptions.elementId = 'gauge1'; + var piePercent = (all_graph_hour_sum / 8) / all_graph_hour_maxVal; + gaugeOptions.inc = piePercent; + var gauge_today_last_hour = new FlexGauge(gaugeOptions); + + gaugeOptions2.appendTo = '#gauge_today_last_days'; + gaugeOptions2.dialLabel = 'Today'; + gaugeOptions2.elementId = 'gauge2'; + //piePercent = (all_graph_day_sum / (8*24)) / max_value; + piePercent = (all_day_avg / 8) / all_day_avg_maxVal; + gaugeOptions2.inc = piePercent; + var gauge_today_last_days = new FlexGauge(gaugeOptions2); + + gaugeOptions3.appendTo = '#gauge_week'; + gaugeOptions3.dialLabel = 'Week'; + gaugeOptions3.elementId = 'gauge3'; + + var graph_avg_sum = 0.0; + var temp_max_val = 0.0; + for (i=0; i temp_max_val ? Math.abs(graph_avg[i][1]) : temp_max_val; + } + + piePercent = (graph_avg_sum / graph_avg.length) / temp_max_val; + gaugeOptions3.inc = piePercent; + var gauge_today_last_days = new FlexGauge(gaugeOptions3); + + + /* --------- Sort providers -------- */ + + graph_avg.sort(function(a, b){return b[1]-a[1]}); + + for (i=1; i<6; i++){ + $('.worst'+i).text(graph_avg[7-(i-1)][0]); + $('.best'+i).text(graph_avg[i-1][0]); + } + + /* ----------- CanvasJS ------------ */ + + var comp_sum_day_pos = 0.0; + var comp_sum_day_neg = 0.0; + var comp_sum_hour_pos = 0.0; + var comp_sum_hour_neg = 0.0; + for(graphNum=0; graphNum<8; graphNum++){ + curr_graphData = all_data[graphNum]; + var gauge_data = curr_graphData.slice(curr_graphData.length-24, curr_graphData.length); + for (i=1; i< gauge_data.length; i++){ + comp_sum_day_pos += gauge_data[i].compoundPos; + comp_sum_day_neg += gauge_data[i].compoundNeg; + + if(i == 23){ + comp_sum_hour_pos += gauge_data[i].compoundPos; + comp_sum_hour_neg += gauge_data[i].compoundNeg; + } + } + + } + + var options_canvasJS_1 = { + + animationEnabled: true, + axisY: { + tickThickness: 0, + lineThickness: 0, + valueFormatString: " ", + gridThickness: 0 + }, + axisX: { + tickThickness: 0, + lineThickness: 0, + labelFontSize: 0.1, + }, + data: [ + { + toolTipContent: "Positive: {y}", + type: "bar", + color: "green", + dataPoints: [ + {y: comp_sum_hour_pos/8} + ] + }, + { + toolTipContent: "Negative: {y}", + type: "bar", + color: "red", + dataPoints: [ + {y: comp_sum_hour_neg/8} + ] + } + ] + }; + + var chart_canvas1 = new CanvasJS.Chart("bar_today_last_hour", options_canvasJS_1); + + var options_canvasJS_2 = { + + animationEnabled: true, + axisY: { + tickThickness: 0, + lineThickness: 0, + valueFormatString: " ", + gridThickness: 0 + }, + axisX: { + tickThickness: 0, + lineThickness: 0, + labelFontSize: 0.1, + }, + data: [ + { + toolTipContent: "Positive: {y}", + type: "bar", + color: "green", + dataPoints: [ + {y: comp_sum_day_pos/8} + ] + }, + { + toolTipContent: "Negative: {y}", + type: "bar", + color: "red", + dataPoints: [ + {y: comp_sum_day_neg/8} + ] + } + ] + }; + + var chart_canvas2 = new CanvasJS.Chart("bar_today_last_days", options_canvasJS_2); + + chart_canvas1.render(); + chart_canvas2.render(); + + + + } +); + + + + + + + + + + + + + + diff --git a/var/www/templates/Moduletrending.html b/var/www/templates/Moduletrending.html index 0e51f95e..a2635868 100644 --- a/var/www/templates/Moduletrending.html +++ b/var/www/templates/Moduletrending.html @@ -24,14 +24,7 @@