diff --git a/bin/SentimentAnalyser.py b/bin/SentimentAnalyser.py index eb89ddc3..8b48610f 100755 --- a/bin/SentimentAnalyser.py +++ b/bin/SentimentAnalyser.py @@ -15,6 +15,7 @@ import time import datetime import calendar import redis +import json from pubsublogger import publisher from Helper import Process from packages import Paste @@ -22,6 +23,8 @@ from packages import Paste from nltk.sentiment.vader import SentimentIntensityAnalyzer from nltk import tokenize +# Config Variables +accepted_Mime_type = ['text/plain'] def Analyse(message, server): #print 'analyzing' @@ -31,68 +34,84 @@ def Analyse(message, server): content = paste.get_p_content() provider = paste.p_source p_date = str(paste._get_p_date()) - #print provider, date + p_MimeType = paste._get_p_encoding() - the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8])) - #print 'pastedate: ', the_date - the_time = datetime.datetime.now() - the_time = datetime.time(getattr(the_time, 'hour'), 0, 0) - #print 'now: ', the_time - combined_datetime = datetime.datetime.combine(the_date, the_time) - #print 'combined: ', combined_datetime - timestamp = calendar.timegm(combined_datetime.timetuple()) - #print 'timestamp: ', timestamp + # Perform further analysis + if p_MimeType == "text/plain": + if isJSON(content): + p_MimeType = "JSON" - sentences = tokenize.sent_tokenize(content.decode('utf-8', 'ignore')) - #print len(sentences) - - avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} - neg_line = 0 - pos_line = 0 - sid = SentimentIntensityAnalyzer() - for sentence in sentences: - ss = sid.polarity_scores(sentence) - for k in sorted(ss): - if k == 'compound': - if ss['neg'] > ss['pos']: - avg_score['compoundNeg'] += ss[k] - neg_line += 1 + if p_MimeType in accepted_Mime_type: + print 'Processing', path + the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8])) + #print 'pastedate: ', the_date + the_time = datetime.datetime.now() + the_time = datetime.time(getattr(the_time, 'hour'), 0, 0) + #print 'now: ', the_time + combined_datetime = datetime.datetime.combine(the_date, the_time) + #print 'combined: ', combined_datetime + timestamp = calendar.timegm(combined_datetime.timetuple()) + #print 'timestamp: ', timestamp + + sentences = tokenize.sent_tokenize(content.decode('utf-8', 'ignore')) + #print len(sentences) + + avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} + neg_line = 0 + pos_line = 0 + sid = SentimentIntensityAnalyzer() + for sentence in sentences: + ss = sid.polarity_scores(sentence) + for k in sorted(ss): + if k == 'compound': + if ss['neg'] > ss['pos']: + avg_score['compoundNeg'] += ss[k] + neg_line += 1 + else: + avg_score['compoundPos'] += ss[k] + pos_line += 1 else: - avg_score['compoundPos'] += ss[k] - pos_line += 1 - else: - avg_score[k] += ss[k] + avg_score[k] += ss[k] + + #print('{0}: {1}, '.format(k, ss[k])) + + for k in avg_score: + if k == 'compoundPos': + avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1) + elif k == 'compoundNeg': + avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1) + else: + avg_score[k] = avg_score[k] / len(sentences) + + + # In redis-levelDB: {} = set, () = K-V + # {Provider_set -> provider_i} + # {Provider_TimestampInHour_i -> UniqID_i}_j + # (UniqID_i -> PasteValue_i) + + server.sadd('Provider_set', provider) + #print 'Provider_set', provider + + provider_timestamp = provider + '_' + str(timestamp) + #print provider_timestamp + server.incr('UniqID') + UniqID = server.get('UniqID') + print provider_timestamp, '->', UniqID + server.sadd(provider_timestamp, UniqID) + server.set(UniqID, avg_score) + print avg_score + #print UniqID, '->', avg_score + else: + print 'Dropped:', p_MimeType + - #print('{0}: {1}, '.format(k, ss[k])) +def isJSON(content): + try: + json.loads(content) + return True - for k in avg_score: - if k == 'compoundPos': - avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1) - elif k == 'compoundNeg': - avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1) - else: - avg_score[k] = avg_score[k] / len(sentences) - - - # In redis-levelDB: {} = set, () = K-V - # {Provider_set -> provider_i} - # {Provider_TimestampInHour_i -> UniqID_i}_j - # (UniqID_i -> PasteValue_i) - - server.sadd('Provider_set', provider) - #print 'Provider_set', provider - - provider_timestamp = provider + '_' + str(timestamp) - #print provider_timestamp - server.incr('UniqID') - UniqID = server.get('UniqID') - print provider_timestamp, '->', UniqID - server.sadd(provider_timestamp, UniqID) - server.set(UniqID, avg_score) - #print UniqID, '->', avg_score - - #print '(', provider, timestamp, str(avg_score) , ')' - #server.hset(provider, timestamp, str(avg_score)) + except Exception,e: + return False if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) @@ -102,7 +121,7 @@ if __name__ == '__main__': publisher.channel = 'Script' # Section name in bin/packages/modules.cfg - config_section = 'SentimentAnalyser' + config_section = 'SentimentAnalysis' # Setup the I/O queues p = Process(config_section) diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 57a85e73..0f3ceb38 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -66,8 +66,8 @@ subscribe = Redis_BrowseWarningPaste #subscribe = Redis_Cve #publish = Redis_BrowseWarningPaste -[SentimentAnalyser] -subscribe = Redis_LinesLong +[SentimentAnalysis] +subscribe = Redis_Global [Release] subscribe = Redis_Global diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 1ff8b097..af846f7a 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -497,7 +497,7 @@ def sentiment_analysis_plot_tool_getdata(): timestamp1 = calendar.timegm(date1.timetuple()) timestamp2 = calendar.timegm(date2.timetuple()) - print timestamp2 + oneHour = 60*60 oneDay = oneHour*24 diff --git a/var/www/static/js/sentiment_trending.js b/var/www/static/js/sentiment_trending.js index f0c9e9c6..ca0eafdd 100644 --- a/var/www/static/js/sentiment_trending.js +++ b/var/www/static/js/sentiment_trending.js @@ -1,4 +1,26 @@ + function generate_offset_to_time(num){ + var to_ret = {}; + for(i=0; i<=num; i++) + to_ret[i] = new Date().getHours()-(23-i)+'h'; + return to_ret; + }; + + function generate_offset_to_date(day){ + var now = new Date(); + var to_ret = {}; + for(i=0; i● {{offset:names}}, {{value}} ', }; @@ -37,7 +60,9 @@ $.getJSON("/sentiment_analysis_getplotdata/", var spark_data = []; var curr_provider = array_provider[graphNum]; var curr_sum = 0.0; + var curr_sum_elem = 0.0; var day_sum = 0.0; + var day_sum_elem = 0.0; var hour_sum = 0.0; for(curr_date=dateStart; curr_date max_value ? Math.abs(pos-neg) : max_value; if(curr_date >= dateStart+oneWeek-24*oneHour){ day_sum += (pos-neg); + day_sum_elem++; } if(curr_date >= dateStart+oneWeek-oneHour){ hour_sum += (pos-neg); @@ -85,7 +112,8 @@ $.getJSON("/sentiment_analysis_getplotdata/", all_graph_day_sum += day_sum; all_graph_hour_sum += hour_sum; - var curr_avg = curr_sum / (oneWeek/oneHour); + var curr_avg = curr_sum / (curr_sum_elem); + //var curr_avg = curr_sum / (oneWeek/oneHour); //var curr_avg = curr_sum / (spark_data.length); graph_avg.push([curr_provider, curr_avg]); plot_data.push(spark_data); @@ -94,6 +122,8 @@ $.getJSON("/sentiment_analysis_getplotdata/", sparklineOptions.chartRangeMax = max_value; sparklineOptions.chartRangeMin = -max_value; + sparklineOptions.tooltipValueLookups = { names: offset_to_date}; + // print week var num = graphNum + 1; var placeholder = '.sparkLineStatsWeek' + num; @@ -102,12 +132,15 @@ $.getJSON("/sentiment_analysis_getplotdata/", $(placeholder+'s').text(curr_avg.toFixed(5)); sparklineOptions.barWidth = 18; + sparklineOptions.tooltipFormat = ' Avg: {{value}} ' $(placeholder+'b').sparkline([curr_avg], sparklineOptions); + sparklineOptions.tooltipFormat = ' {{offset:names}}, {{value}} ' sparklineOptions.barWidth = 2; + sparklineOptions.tooltipValueLookups = { names: offset_to_time}; // print today var data_length = plot_data[graphNum].length; - var data_today = plot_data[graphNum].slice(data_length-24, data_length-1); + var data_today = plot_data[graphNum].slice(data_length-24, data_length); placeholder = '.sparkLineStatsToday' + num; sparklineOptions.barWidth = 14; @@ -115,9 +148,13 @@ $.getJSON("/sentiment_analysis_getplotdata/", $(placeholder+'t').text(curr_provider); sparklineOptions.barWidth = 18; - $(placeholder+'b').sparkline([day_sum/24], sparklineOptions); + sparklineOptions.tooltipFormat = ' Avg: {{value}} ' + //var day_avg = day_sum/24; + var day_avg = day_sum/day_sum_elem; + $(placeholder+'b').sparkline([day_avg], sparklineOptions); + sparklineOptions.tooltipFormat = ' {{offset:names}}, {{value}} ' sparklineOptions.barWidth = 2; - $(placeholder+'s').text((day_sum/24).toFixed(5)); + $(placeholder+'s').text((day_avg).toFixed(5)); }//for loop @@ -153,13 +190,15 @@ $.getJSON("/sentiment_analysis_getplotdata/", gaugeOptions.appendTo = '#gauge_today_last_hour'; gaugeOptions.dialLabel = 'Last hour'; gaugeOptions.elementId = 'gauge1'; - gaugeOptions.inc = all_graph_hour_sum / 8; + var piePercent = (all_graph_hour_sum / 8) / max_value; + gaugeOptions.inc = piePercent; var gauge_today_last_hour = new FlexGauge(gaugeOptions); gaugeOptions2.appendTo = '#gauge_today_last_days'; gaugeOptions2.dialLabel = 'Today'; gaugeOptions2.elementId = 'gauge2'; - gaugeOptions2.inc = all_graph_day_sum / 8; + piePercent = (all_graph_day_sum / (8*24)) / max_value; + gaugeOptions2.inc = piePercent; var gauge_today_last_days = new FlexGauge(gaugeOptions2); gaugeOptions3.appendTo = '#gauge_week'; @@ -167,10 +206,14 @@ $.getJSON("/sentiment_analysis_getplotdata/", gaugeOptions3.elementId = 'gauge3'; var graph_avg_sum = 0.0; - for (i=0; i temp_max_val ? Math.abs(graph_avg[i][1]) : temp_max_val; + } - gaugeOptions3.inc = graph_avg_sum / graph_avg.length; + piePercent = (graph_avg_sum / graph_avg.length) / temp_max_val; + gaugeOptions3.inc = piePercent; var gauge_today_last_days = new FlexGauge(gaugeOptions3); @@ -185,21 +228,24 @@ $.getJSON("/sentiment_analysis_getplotdata/", /* ----------- CanvasJS ------------ */ - var gauge_data = graph_data.slice(graph_data.length-24*2, graph_data.length-24*1); var comp_sum_day_pos = 0.0; var comp_sum_day_neg = 0.0; var comp_sum_hour_pos = 0.0; var comp_sum_hour_neg = 0.0; - for (i=1; i< gauge_data.length; i++){ - comp_sum_day_pos += gauge_data[i].compoundPos; - comp_sum_day_neg += gauge_data[i].compoundNeg; + for(graphNum=0; graphNum<8; graphNum++){ + curr_graphData = all_data[graphNum]; + var gauge_data = curr_graphData.slice(curr_graphData.length-24, curr_graphData.length); + for (i=1; i< gauge_data.length; i++){ + comp_sum_day_pos += gauge_data[i].compoundPos; + comp_sum_day_neg += gauge_data[i].compoundNeg; - if(i >= 24){ - comp_sum_hour_pos += gauge_data[i].compoundPos; - comp_sum_hour_neg += gauge_data[i].compoundNeg; + if(i == 23){ + comp_sum_hour_pos += gauge_data[i].compoundPos; + comp_sum_hour_neg += gauge_data[i].compoundNeg; + } } - } + } var options_canvasJS_1 = { @@ -216,20 +262,20 @@ $.getJSON("/sentiment_analysis_getplotdata/", labelFontSize: 0.1, }, data: [ - { - type: "bar", - color: "green", - dataPoints: [ - {y: comp_sum_hour_pos/8} - ] - }, - { - type: "bar", - color: "red", - dataPoints: [ - {y: comp_sum_hour_neg/8} - ] - } + { + type: "bar", + color: "green", + dataPoints: [ + {y: comp_sum_hour_pos/8} + ] + }, + { + type: "bar", + color: "red", + dataPoints: [ + {y: comp_sum_hour_neg/8} + ] + } ] };