mirror of https://github.com/CIRCL/AIL-framework
Added possibility to select in top_terms the data per_paste or not (need module testing)
parent
e98e92b0d4
commit
25adae0104
|
@ -117,11 +117,17 @@ if __name__ == "__main__":
|
||||||
r_serv1.hincrby(low_word, date, int(score))
|
r_serv1.hincrby(low_word, date, int(score))
|
||||||
|
|
||||||
# Update redis
|
# Update redis
|
||||||
|
#consider the num of occurence of this term
|
||||||
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
|
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
|
||||||
|
#1 term per paste
|
||||||
|
curr_word_value_perPaste = int(server_term.hincrby(timestamp, low_word, int(1)))
|
||||||
|
|
||||||
# Add in set only if term is not in the blacklist
|
# Add in set only if term is not in the blacklist
|
||||||
if low_word not in server_term.smembers(BlackListTermsSet_Name):
|
if low_word not in server_term.smembers(BlackListTermsSet_Name):
|
||||||
|
#consider the num of occurence of this term
|
||||||
server_term.zincrby(curr_set, low_word, float(score))
|
server_term.zincrby(curr_set, low_word, float(score))
|
||||||
|
#1 term per paste
|
||||||
|
server_term.zincrby("per_paste_" + curr_set, low_word, float(score))
|
||||||
|
|
||||||
#Add more info for tracked terms
|
#Add more info for tracked terms
|
||||||
check_if_tracked_term(low_word, filename)
|
check_if_tracked_term(low_word, filename)
|
||||||
|
|
|
@ -48,11 +48,13 @@ def manage_top_set():
|
||||||
blacklist_size = int(server_term.scard(BlackListTermsSet_Name))
|
blacklist_size = int(server_term.scard(BlackListTermsSet_Name))
|
||||||
|
|
||||||
dico = {}
|
dico = {}
|
||||||
|
dico_per_paste = {}
|
||||||
|
|
||||||
# Retreive top data (max_card + blacklist_size) from days sets
|
# Retreive top data (max_card + blacklist_size) from days sets
|
||||||
for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay):
|
for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay):
|
||||||
curr_set = top_termFreq_setName_day[0] + str(timestamp)
|
curr_set = top_termFreq_setName_day[0] + str(timestamp)
|
||||||
array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size)
|
array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size)
|
||||||
|
array_top_day_per_paste = server_term.zrevrangebyscore("per_paste_" + curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size)
|
||||||
|
|
||||||
for word, value in array_top_day:
|
for word, value in array_top_day:
|
||||||
if word not in server_term.smembers(BlackListTermsSet_Name):
|
if word not in server_term.smembers(BlackListTermsSet_Name):
|
||||||
|
@ -61,8 +63,16 @@ def manage_top_set():
|
||||||
else:
|
else:
|
||||||
dico[word] = value
|
dico[word] = value
|
||||||
|
|
||||||
|
for word, value in "per_paste_" + array_top_day:
|
||||||
|
if word not in server_term.smembers(BlackListTermsSet_Name):
|
||||||
|
if word in dico_per_paste.keys():
|
||||||
|
dico_per_paste[word] += value
|
||||||
|
else:
|
||||||
|
dico_per_paste[word] = value
|
||||||
|
|
||||||
if timestamp == startDate - num_day_week*oneDay:
|
if timestamp == startDate - num_day_week*oneDay:
|
||||||
dico_week = copy.deepcopy(dico)
|
dico_week = copy.deepcopy(dico)
|
||||||
|
dico_week_per_paste = copy.deepcopy(dico_per_paste)
|
||||||
|
|
||||||
# convert dico into sorted array
|
# convert dico into sorted array
|
||||||
array_month = []
|
array_month = []
|
||||||
|
@ -77,17 +87,37 @@ def manage_top_set():
|
||||||
array_week.sort(key=lambda tup: -tup[1])
|
array_week.sort(key=lambda tup: -tup[1])
|
||||||
array_week = array_week[0:20]
|
array_week = array_week[0:20]
|
||||||
|
|
||||||
|
# convert dico_per_paste into sorted array
|
||||||
|
array_month_per_paste = []
|
||||||
|
for w, v in dico_per_paste.iteritems():
|
||||||
|
array_month_per_paste.append((w, v))
|
||||||
|
array_month_per_paste.sort(key=lambda tup: -tup[1])
|
||||||
|
array_month_per_paste = array_month_per_paste[0:20]
|
||||||
|
|
||||||
|
array_week_per_paste = []
|
||||||
|
for w, v in dico_week_per_paste.iteritems():
|
||||||
|
array_week_per_paste.append((w, v))
|
||||||
|
array_week_per_paste.sort(key=lambda tup: -tup[1])
|
||||||
|
array_week_per_paste = array_week_per_paste[0:20]
|
||||||
|
|
||||||
|
|
||||||
# suppress every terms in top sets
|
# suppress every terms in top sets
|
||||||
for curr_set, curr_num_day in top_termFreq_set_array[1:3]:
|
for curr_set, curr_num_day in top_termFreq_set_array[1:3]:
|
||||||
for w in server_term.zrange(curr_set, 0, -1):
|
for w in server_term.zrange(curr_set, 0, -1):
|
||||||
server_term.zrem(curr_set, w)
|
server_term.zrem(curr_set, w)
|
||||||
|
for w in server_term.zrange("per_paste_" + curr_set, 0, -1):
|
||||||
|
server_term.zrem("per_paste_" + curr_set, w)
|
||||||
|
|
||||||
# Add top term from sorted array in their respective sorted sets
|
# Add top term from sorted array in their respective sorted sets
|
||||||
for elem in array_week:
|
for elem in array_week:
|
||||||
server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0])
|
server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0])
|
||||||
|
for elem in array_week_per_paste:
|
||||||
|
server_term.zadd("per_paste_" + top_termFreq_setName_week[0], float(elem[1]), elem[0])
|
||||||
|
|
||||||
for elem in array_month:
|
for elem in array_month:
|
||||||
server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0])
|
server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0])
|
||||||
|
for elem in array_month_per_paste:
|
||||||
|
server_term.zadd("per_paste_" + top_termFreq_setName_month[0], float(elem[1]), elem[0])
|
||||||
|
|
||||||
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
|
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
|
||||||
value = str(timestamp) + ", " + "-"
|
value = str(timestamp) + ", " + "-"
|
||||||
|
|
|
@ -196,7 +196,9 @@ def terms_plot_tool_data():
|
||||||
|
|
||||||
@app.route("/terms_plot_top/")
|
@app.route("/terms_plot_top/")
|
||||||
def terms_plot_top():
|
def terms_plot_top():
|
||||||
return render_template("terms_plot_top.html")
|
per_paste = request.args.get('per_paste')
|
||||||
|
per_paste = per_paste if per_paste is not None else 1
|
||||||
|
return render_template("terms_plot_top.html", per_paste=per_paste)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/terms_plot_top_data/")
|
@app.route("/terms_plot_top_data/")
|
||||||
|
@ -212,11 +214,15 @@ def terms_plot_top_data():
|
||||||
|
|
||||||
the_set = request.args.get('set')
|
the_set = request.args.get('set')
|
||||||
num_day = int(request.args.get('num_day'))
|
num_day = int(request.args.get('num_day'))
|
||||||
|
per_paste = int(request.args.get('per_paste'))
|
||||||
|
if per_paste == 1:
|
||||||
|
the_set = "per_paste_" + the_set
|
||||||
|
|
||||||
if the_set is None:
|
if the_set is None:
|
||||||
return "None"
|
return "None"
|
||||||
else:
|
else:
|
||||||
to_return = []
|
to_return = []
|
||||||
if the_set == "TopTermFreq_set_day":
|
if "TopTermFreq_set_day" in the_set:
|
||||||
the_set += "_" + str(today_timestamp)
|
the_set += "_" + str(today_timestamp)
|
||||||
|
|
||||||
for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20):
|
for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20):
|
||||||
|
|
|
@ -67,6 +67,9 @@
|
||||||
<div class="col-lg-12">
|
<div class="col-lg-12">
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-lg-12">
|
<div class="col-lg-12">
|
||||||
|
<form>
|
||||||
|
<input type="checkbox" id="per_paste" value="per_paste" onclick="reload_per_paste()"> 1 term per paste
|
||||||
|
</form>
|
||||||
<div id="panel-today" class="panel panel-info">
|
<div id="panel-today" class="panel panel-info">
|
||||||
<div class="panel-heading">
|
<div class="panel-heading">
|
||||||
<strong>Today</strong>
|
<strong>Today</strong>
|
||||||
|
@ -281,7 +284,15 @@
|
||||||
$(document).ready(function(){
|
$(document).ready(function(){
|
||||||
activePage = $('h1.page-header').attr('data-page');
|
activePage = $('h1.page-header').attr('data-page');
|
||||||
$("#"+activePage).addClass("active");
|
$("#"+activePage).addClass("active");
|
||||||
|
if({{ per_paste }} == 1) {
|
||||||
|
$("#per_paste").attr('checked', true)
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
function reload_per_paste() {
|
||||||
|
var checked = $("#per_paste").prop( "checked" ) ? 1 : 0;
|
||||||
|
window.location.href = {{ url_for('terms_plot_top') }}+"?per_paste="+checked;
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
|
||||||
|
@ -332,6 +343,7 @@ set_today = "TopTermFreq_set_day";
|
||||||
set_week = "TopTermFreq_set_week";
|
set_week = "TopTermFreq_set_week";
|
||||||
set_month = "TopTermFreq_set_month";
|
set_month = "TopTermFreq_set_month";
|
||||||
default_num_curves = 8;
|
default_num_curves = 8;
|
||||||
|
per_paste = {{ per_paste }}
|
||||||
|
|
||||||
var plot_today;
|
var plot_today;
|
||||||
var plot_week;
|
var plot_week;
|
||||||
|
@ -339,7 +351,7 @@ var plot_month;
|
||||||
|
|
||||||
var promises = []; // Used to know when everything has been received
|
var promises = []; // Used to know when everything has been received
|
||||||
|
|
||||||
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_today, num_day: 5 }, function(data, status){
|
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_today, num_day: 5, per_paste: per_paste }, function(data, status){
|
||||||
data.sort(function(a, b){return b[2]-a[2];});
|
data.sort(function(a, b){return b[2]-a[2];});
|
||||||
// Sort data
|
// Sort data
|
||||||
var table_today = $("#table-today")
|
var table_today = $("#table-today")
|
||||||
|
@ -380,7 +392,7 @@ promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_today
|
||||||
|
|
||||||
}));
|
}));
|
||||||
|
|
||||||
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_week, num_day: 7 }, function(data, status){
|
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_week, num_day: 7, per_paste: per_paste }, function(data, status){
|
||||||
data.sort(function(a, b){return b[2]-a[2];});
|
data.sort(function(a, b){return b[2]-a[2];});
|
||||||
// Sort data
|
// Sort data
|
||||||
var table = $("#table-week")
|
var table = $("#table-week")
|
||||||
|
@ -420,7 +432,7 @@ promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_week,
|
||||||
});
|
});
|
||||||
}));
|
}));
|
||||||
|
|
||||||
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_month, num_day: 31 }, function(data, status){
|
promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_month, num_day: 31, per_paste: per_paste }, function(data, status){
|
||||||
data.sort(function(a, b){return b[2]-a[2];});
|
data.sort(function(a, b){return b[2]-a[2];});
|
||||||
// Sort data
|
// Sort data
|
||||||
var table = $("#table-month")
|
var table = $("#table-month")
|
||||||
|
|
Loading…
Reference in New Issue