mirror of https://github.com/CIRCL/AIL-framework
Added draft of filter in sentiment analysis (Discard syntaxical languages) + Added nice tooltip for sparkline. Trending displays avg in function of the number of elements processed and not for the complete week + fixed bug in gauge and canvasjs (was performing avg with only 1 graph instead of all 8).
parent
ecd834ffb6
commit
1084e45f1b
|
@ -15,6 +15,7 @@ import time
|
|||
import datetime
|
||||
import calendar
|
||||
import redis
|
||||
import json
|
||||
from pubsublogger import publisher
|
||||
from Helper import Process
|
||||
from packages import Paste
|
||||
|
@ -22,6 +23,8 @@ from packages import Paste
|
|||
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
||||
from nltk import tokenize
|
||||
|
||||
# Config Variables
|
||||
accepted_Mime_type = ['text/plain']
|
||||
|
||||
def Analyse(message, server):
|
||||
#print 'analyzing'
|
||||
|
@ -31,68 +34,84 @@ def Analyse(message, server):
|
|||
content = paste.get_p_content()
|
||||
provider = paste.p_source
|
||||
p_date = str(paste._get_p_date())
|
||||
#print provider, date
|
||||
p_MimeType = paste._get_p_encoding()
|
||||
|
||||
the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8]))
|
||||
#print 'pastedate: ', the_date
|
||||
the_time = datetime.datetime.now()
|
||||
the_time = datetime.time(getattr(the_time, 'hour'), 0, 0)
|
||||
#print 'now: ', the_time
|
||||
combined_datetime = datetime.datetime.combine(the_date, the_time)
|
||||
#print 'combined: ', combined_datetime
|
||||
timestamp = calendar.timegm(combined_datetime.timetuple())
|
||||
#print 'timestamp: ', timestamp
|
||||
# Perform further analysis
|
||||
if p_MimeType == "text/plain":
|
||||
if isJSON(content):
|
||||
p_MimeType = "JSON"
|
||||
|
||||
sentences = tokenize.sent_tokenize(content.decode('utf-8', 'ignore'))
|
||||
#print len(sentences)
|
||||
|
||||
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
|
||||
neg_line = 0
|
||||
pos_line = 0
|
||||
sid = SentimentIntensityAnalyzer()
|
||||
for sentence in sentences:
|
||||
ss = sid.polarity_scores(sentence)
|
||||
for k in sorted(ss):
|
||||
if k == 'compound':
|
||||
if ss['neg'] > ss['pos']:
|
||||
avg_score['compoundNeg'] += ss[k]
|
||||
neg_line += 1
|
||||
if p_MimeType in accepted_Mime_type:
|
||||
print 'Processing', path
|
||||
the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8]))
|
||||
#print 'pastedate: ', the_date
|
||||
the_time = datetime.datetime.now()
|
||||
the_time = datetime.time(getattr(the_time, 'hour'), 0, 0)
|
||||
#print 'now: ', the_time
|
||||
combined_datetime = datetime.datetime.combine(the_date, the_time)
|
||||
#print 'combined: ', combined_datetime
|
||||
timestamp = calendar.timegm(combined_datetime.timetuple())
|
||||
#print 'timestamp: ', timestamp
|
||||
|
||||
sentences = tokenize.sent_tokenize(content.decode('utf-8', 'ignore'))
|
||||
#print len(sentences)
|
||||
|
||||
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
|
||||
neg_line = 0
|
||||
pos_line = 0
|
||||
sid = SentimentIntensityAnalyzer()
|
||||
for sentence in sentences:
|
||||
ss = sid.polarity_scores(sentence)
|
||||
for k in sorted(ss):
|
||||
if k == 'compound':
|
||||
if ss['neg'] > ss['pos']:
|
||||
avg_score['compoundNeg'] += ss[k]
|
||||
neg_line += 1
|
||||
else:
|
||||
avg_score['compoundPos'] += ss[k]
|
||||
pos_line += 1
|
||||
else:
|
||||
avg_score['compoundPos'] += ss[k]
|
||||
pos_line += 1
|
||||
else:
|
||||
avg_score[k] += ss[k]
|
||||
avg_score[k] += ss[k]
|
||||
|
||||
#print('{0}: {1}, '.format(k, ss[k]))
|
||||
|
||||
for k in avg_score:
|
||||
if k == 'compoundPos':
|
||||
avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1)
|
||||
elif k == 'compoundNeg':
|
||||
avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1)
|
||||
else:
|
||||
avg_score[k] = avg_score[k] / len(sentences)
|
||||
|
||||
|
||||
# In redis-levelDB: {} = set, () = K-V
|
||||
# {Provider_set -> provider_i}
|
||||
# {Provider_TimestampInHour_i -> UniqID_i}_j
|
||||
# (UniqID_i -> PasteValue_i)
|
||||
|
||||
server.sadd('Provider_set', provider)
|
||||
#print 'Provider_set', provider
|
||||
|
||||
provider_timestamp = provider + '_' + str(timestamp)
|
||||
#print provider_timestamp
|
||||
server.incr('UniqID')
|
||||
UniqID = server.get('UniqID')
|
||||
print provider_timestamp, '->', UniqID
|
||||
server.sadd(provider_timestamp, UniqID)
|
||||
server.set(UniqID, avg_score)
|
||||
print avg_score
|
||||
#print UniqID, '->', avg_score
|
||||
else:
|
||||
print 'Dropped:', p_MimeType
|
||||
|
||||
|
||||
#print('{0}: {1}, '.format(k, ss[k]))
|
||||
def isJSON(content):
|
||||
try:
|
||||
json.loads(content)
|
||||
return True
|
||||
|
||||
for k in avg_score:
|
||||
if k == 'compoundPos':
|
||||
avg_score[k] = avg_score[k] / (pos_line if pos_line > 0 else 1)
|
||||
elif k == 'compoundNeg':
|
||||
avg_score[k] = avg_score[k] / (neg_line if neg_line > 0 else 1)
|
||||
else:
|
||||
avg_score[k] = avg_score[k] / len(sentences)
|
||||
|
||||
|
||||
# In redis-levelDB: {} = set, () = K-V
|
||||
# {Provider_set -> provider_i}
|
||||
# {Provider_TimestampInHour_i -> UniqID_i}_j
|
||||
# (UniqID_i -> PasteValue_i)
|
||||
|
||||
server.sadd('Provider_set', provider)
|
||||
#print 'Provider_set', provider
|
||||
|
||||
provider_timestamp = provider + '_' + str(timestamp)
|
||||
#print provider_timestamp
|
||||
server.incr('UniqID')
|
||||
UniqID = server.get('UniqID')
|
||||
print provider_timestamp, '->', UniqID
|
||||
server.sadd(provider_timestamp, UniqID)
|
||||
server.set(UniqID, avg_score)
|
||||
#print UniqID, '->', avg_score
|
||||
|
||||
#print '(', provider, timestamp, str(avg_score) , ')'
|
||||
#server.hset(provider, timestamp, str(avg_score))
|
||||
except Exception,e:
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||
|
@ -102,7 +121,7 @@ if __name__ == '__main__':
|
|||
publisher.channel = 'Script'
|
||||
|
||||
# Section name in bin/packages/modules.cfg
|
||||
config_section = 'SentimentAnalyser'
|
||||
config_section = 'SentimentAnalysis'
|
||||
|
||||
# Setup the I/O queues
|
||||
p = Process(config_section)
|
||||
|
|
|
@ -66,8 +66,8 @@ subscribe = Redis_BrowseWarningPaste
|
|||
#subscribe = Redis_Cve
|
||||
#publish = Redis_BrowseWarningPaste
|
||||
|
||||
[SentimentAnalyser]
|
||||
subscribe = Redis_LinesLong
|
||||
[SentimentAnalysis]
|
||||
subscribe = Redis_Global
|
||||
|
||||
[Release]
|
||||
subscribe = Redis_Global
|
||||
|
|
|
@ -497,7 +497,7 @@ def sentiment_analysis_plot_tool_getdata():
|
|||
|
||||
timestamp1 = calendar.timegm(date1.timetuple())
|
||||
timestamp2 = calendar.timegm(date2.timetuple())
|
||||
print timestamp2
|
||||
|
||||
oneHour = 60*60
|
||||
oneDay = oneHour*24
|
||||
|
||||
|
|
|
@ -1,4 +1,26 @@
|
|||
|
||||
function generate_offset_to_time(num){
|
||||
var to_ret = {};
|
||||
for(i=0; i<=num; i++)
|
||||
to_ret[i] = new Date().getHours()-(23-i)+'h';
|
||||
return to_ret;
|
||||
};
|
||||
|
||||
function generate_offset_to_date(day){
|
||||
var now = new Date();
|
||||
var to_ret = {};
|
||||
for(i=0; i<day; i++){
|
||||
for(j=0; j<24; j++){
|
||||
var t1 =now.getDate()-i + ":";
|
||||
var t2 =now.getHours()-(23-j)+"h";
|
||||
to_ret[j+24*i] = t1+t2;
|
||||
}
|
||||
}
|
||||
return to_ret;
|
||||
};
|
||||
|
||||
var offset_to_time = generate_offset_to_time(23);
|
||||
var offset_to_date = generate_offset_to_date(7);
|
||||
|
||||
var sparklineOptions = {
|
||||
height: 80,//Height of the chart - Defaults to 'auto' (line height of the containing tag)
|
||||
|
@ -13,6 +35,7 @@
|
|||
negBarColor: '#f22929',
|
||||
zeroColor: '#ffff00',
|
||||
|
||||
tooltipFormat: '<span style="color: {{color}}">●</span> {{offset:names}}, {{value}} </span>',
|
||||
};
|
||||
|
||||
|
||||
|
@ -37,7 +60,9 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
var spark_data = [];
|
||||
var curr_provider = array_provider[graphNum];
|
||||
var curr_sum = 0.0;
|
||||
var curr_sum_elem = 0.0;
|
||||
var day_sum = 0.0;
|
||||
var day_sum_elem = 0.0;
|
||||
var hour_sum = 0.0;
|
||||
|
||||
for(curr_date=dateStart; curr_date<dateStart+oneWeek; curr_date+=oneHour){
|
||||
|
@ -71,10 +96,12 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
graph_data.push({'neg': neg, 'neu': neu, 'pos': pos, 'compoundPos': compPosAvg, 'compoundNeg': compNegAvg});
|
||||
spark_data.push(pos-neg);
|
||||
curr_sum += (pos-neg);
|
||||
curr_sum_elem++;
|
||||
max_value = Math.abs(pos-neg) > max_value ? Math.abs(pos-neg) : max_value;
|
||||
|
||||
if(curr_date >= dateStart+oneWeek-24*oneHour){
|
||||
day_sum += (pos-neg);
|
||||
day_sum_elem++;
|
||||
}
|
||||
if(curr_date >= dateStart+oneWeek-oneHour){
|
||||
hour_sum += (pos-neg);
|
||||
|
@ -85,7 +112,8 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
all_graph_day_sum += day_sum;
|
||||
all_graph_hour_sum += hour_sum;
|
||||
|
||||
var curr_avg = curr_sum / (oneWeek/oneHour);
|
||||
var curr_avg = curr_sum / (curr_sum_elem);
|
||||
//var curr_avg = curr_sum / (oneWeek/oneHour);
|
||||
//var curr_avg = curr_sum / (spark_data.length);
|
||||
graph_avg.push([curr_provider, curr_avg]);
|
||||
plot_data.push(spark_data);
|
||||
|
@ -94,6 +122,8 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
|
||||
sparklineOptions.chartRangeMax = max_value;
|
||||
sparklineOptions.chartRangeMin = -max_value;
|
||||
sparklineOptions.tooltipValueLookups = { names: offset_to_date};
|
||||
|
||||
// print week
|
||||
var num = graphNum + 1;
|
||||
var placeholder = '.sparkLineStatsWeek' + num;
|
||||
|
@ -102,12 +132,15 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
$(placeholder+'s').text(curr_avg.toFixed(5));
|
||||
|
||||
sparklineOptions.barWidth = 18;
|
||||
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">●</span> Avg: {{value}} </span>'
|
||||
$(placeholder+'b').sparkline([curr_avg], sparklineOptions);
|
||||
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">●</span> {{offset:names}}, {{value}} </span>'
|
||||
sparklineOptions.barWidth = 2;
|
||||
sparklineOptions.tooltipValueLookups = { names: offset_to_time};
|
||||
|
||||
// print today
|
||||
var data_length = plot_data[graphNum].length;
|
||||
var data_today = plot_data[graphNum].slice(data_length-24, data_length-1);
|
||||
var data_today = plot_data[graphNum].slice(data_length-24, data_length);
|
||||
|
||||
placeholder = '.sparkLineStatsToday' + num;
|
||||
sparklineOptions.barWidth = 14;
|
||||
|
@ -115,9 +148,13 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
$(placeholder+'t').text(curr_provider);
|
||||
|
||||
sparklineOptions.barWidth = 18;
|
||||
$(placeholder+'b').sparkline([day_sum/24], sparklineOptions);
|
||||
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">●</span> Avg: {{value}} </span>'
|
||||
//var day_avg = day_sum/24;
|
||||
var day_avg = day_sum/day_sum_elem;
|
||||
$(placeholder+'b').sparkline([day_avg], sparklineOptions);
|
||||
sparklineOptions.tooltipFormat = '<span style="color: {{color}}">●</span> {{offset:names}}, {{value}} </span>'
|
||||
sparklineOptions.barWidth = 2;
|
||||
$(placeholder+'s').text((day_sum/24).toFixed(5));
|
||||
$(placeholder+'s').text((day_avg).toFixed(5));
|
||||
|
||||
}//for loop
|
||||
|
||||
|
@ -153,13 +190,15 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
gaugeOptions.appendTo = '#gauge_today_last_hour';
|
||||
gaugeOptions.dialLabel = 'Last hour';
|
||||
gaugeOptions.elementId = 'gauge1';
|
||||
gaugeOptions.inc = all_graph_hour_sum / 8;
|
||||
var piePercent = (all_graph_hour_sum / 8) / max_value;
|
||||
gaugeOptions.inc = piePercent;
|
||||
var gauge_today_last_hour = new FlexGauge(gaugeOptions);
|
||||
|
||||
gaugeOptions2.appendTo = '#gauge_today_last_days';
|
||||
gaugeOptions2.dialLabel = 'Today';
|
||||
gaugeOptions2.elementId = 'gauge2';
|
||||
gaugeOptions2.inc = all_graph_day_sum / 8;
|
||||
piePercent = (all_graph_day_sum / (8*24)) / max_value;
|
||||
gaugeOptions2.inc = piePercent;
|
||||
var gauge_today_last_days = new FlexGauge(gaugeOptions2);
|
||||
|
||||
gaugeOptions3.appendTo = '#gauge_week';
|
||||
|
@ -167,10 +206,14 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
gaugeOptions3.elementId = 'gauge3';
|
||||
|
||||
var graph_avg_sum = 0.0;
|
||||
for (i=0; i<graph_avg.length; i++)
|
||||
var temp_max_val = 0.0;
|
||||
for (i=0; i<graph_avg.length; i++){
|
||||
graph_avg_sum += graph_avg[i][1];
|
||||
temp_max_val = Math.abs(graph_avg[i][1]) > temp_max_val ? Math.abs(graph_avg[i][1]) : temp_max_val;
|
||||
}
|
||||
|
||||
gaugeOptions3.inc = graph_avg_sum / graph_avg.length;
|
||||
piePercent = (graph_avg_sum / graph_avg.length) / temp_max_val;
|
||||
gaugeOptions3.inc = piePercent;
|
||||
var gauge_today_last_days = new FlexGauge(gaugeOptions3);
|
||||
|
||||
|
||||
|
@ -185,21 +228,24 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
|
||||
/* ----------- CanvasJS ------------ */
|
||||
|
||||
var gauge_data = graph_data.slice(graph_data.length-24*2, graph_data.length-24*1);
|
||||
var comp_sum_day_pos = 0.0;
|
||||
var comp_sum_day_neg = 0.0;
|
||||
var comp_sum_hour_pos = 0.0;
|
||||
var comp_sum_hour_neg = 0.0;
|
||||
for (i=1; i< gauge_data.length; i++){
|
||||
comp_sum_day_pos += gauge_data[i].compoundPos;
|
||||
comp_sum_day_neg += gauge_data[i].compoundNeg;
|
||||
for(graphNum=0; graphNum<8; graphNum++){
|
||||
curr_graphData = all_data[graphNum];
|
||||
var gauge_data = curr_graphData.slice(curr_graphData.length-24, curr_graphData.length);
|
||||
for (i=1; i< gauge_data.length; i++){
|
||||
comp_sum_day_pos += gauge_data[i].compoundPos;
|
||||
comp_sum_day_neg += gauge_data[i].compoundNeg;
|
||||
|
||||
if(i >= 24){
|
||||
comp_sum_hour_pos += gauge_data[i].compoundPos;
|
||||
comp_sum_hour_neg += gauge_data[i].compoundNeg;
|
||||
if(i == 23){
|
||||
comp_sum_hour_pos += gauge_data[i].compoundPos;
|
||||
comp_sum_hour_neg += gauge_data[i].compoundNeg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var options_canvasJS_1 = {
|
||||
|
||||
|
@ -216,20 +262,20 @@ $.getJSON("/sentiment_analysis_getplotdata/",
|
|||
labelFontSize: 0.1,
|
||||
},
|
||||
data: [
|
||||
{
|
||||
type: "bar",
|
||||
color: "green",
|
||||
dataPoints: [
|
||||
{y: comp_sum_hour_pos/8}
|
||||
]
|
||||
},
|
||||
{
|
||||
type: "bar",
|
||||
color: "red",
|
||||
dataPoints: [
|
||||
{y: comp_sum_hour_neg/8}
|
||||
]
|
||||
}
|
||||
{
|
||||
type: "bar",
|
||||
color: "green",
|
||||
dataPoints: [
|
||||
{y: comp_sum_hour_pos/8}
|
||||
]
|
||||
},
|
||||
{
|
||||
type: "bar",
|
||||
color: "red",
|
||||
dataPoints: [
|
||||
{y: comp_sum_hour_neg/8}
|
||||
]
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue