mirror of https://github.com/CIRCL/AIL-framework
Removed commented code
parent
27ae577e00
commit
2fd2c8af39
44
bin/Curve.py
44
bin/Curve.py
|
@ -1,9 +1,6 @@
|
|||
#!/usr/bin/env python2
|
||||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
The ZMQ_Sub_Curve Module
|
||||
============================
|
||||
|
||||
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
|
||||
|
||||
This modules update a .csv file used to draw curves representing selected
|
||||
|
@ -15,11 +12,9 @@ words and their occurency per day.
|
|||
the same Subscriber name in both of them.
|
||||
|
||||
|
||||
This Module is also used for term frequency.
|
||||
|
||||
|
||||
zrank for each day
|
||||
week -> top zrank for each day
|
||||
|
||||
/!\ Top set management is done in the module Curve_manage_top_set
|
||||
|
||||
|
||||
Requirements
|
||||
|
@ -117,48 +112,21 @@ if __name__ == "__main__":
|
|||
|
||||
|
||||
low_word = word.lower()
|
||||
#Old curve
|
||||
#Old curve with words in file
|
||||
r_serv1.hincrby(low_word, date, int(score))
|
||||
|
||||
|
||||
# Update redis
|
||||
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
|
||||
|
||||
# Add in set only if term is not in the blacklist
|
||||
if low_word not in server_term.smembers(BlackListTermsSet_Name):
|
||||
server_term.zincrby(curr_set, low_word, float(score))
|
||||
|
||||
#Add more info for tracked terms
|
||||
check_if_tracked_term(low_word, filename)
|
||||
|
||||
# Manage Top set is done in module Curve_manage_top_sets
|
||||
|
||||
'''
|
||||
if server_term.scard(curr_set) < top_term_freq_max_set_cardinality:
|
||||
server_term.sadd(curr_set, low_word)
|
||||
elif server_term.sismember(curr_set, low_word):
|
||||
continue
|
||||
|
||||
else:
|
||||
|
||||
|
||||
#timer = time.clock()
|
||||
curr_word_value = getValueOverRange(low_word, timestamp, curr_num_day)
|
||||
#print 'curr_range', time.clock() - timer
|
||||
top_termFreq = server_term.smembers(curr_set)
|
||||
sorted_top_termFreq_set = []
|
||||
#timer = time.clock()
|
||||
for word in top_termFreq:
|
||||
word_value = getValueOverRange(word, timestamp, curr_num_day)
|
||||
sorted_top_termFreq_set.append((word, word_value))
|
||||
|
||||
sorted_top_termFreq_set.sort(key=lambda tup: tup[1])
|
||||
#print 'whole_range', time.clock() - timer
|
||||
|
||||
if curr_word_value > int(sorted_top_termFreq_set[0][1]):
|
||||
print str(curr_num_day)+':', low_word, curr_word_value, '\t', sorted_top_termFreq_set[0][0], sorted_top_termFreq_set[0][1], '\t', curr_word_value > sorted_top_termFreq_set[0][1]
|
||||
server_term.srem(curr_set, sorted_top_termFreq_set[0][0])
|
||||
server_term.sadd(curr_set, low_word)
|
||||
'''
|
||||
else:
|
||||
|
||||
if generate_new_graph:
|
||||
generate_new_graph = False
|
||||
print 'Building graph'
|
||||
|
|
|
@ -2,10 +2,8 @@
|
|||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
|
||||
|
||||
|
||||
zrank for each day
|
||||
week -> top zrank for each day
|
||||
This module manage top sets for terms frequency.
|
||||
Every 'refresh_rate' update the weekly and monthly set
|
||||
|
||||
|
||||
Requirements
|
||||
|
@ -22,13 +20,13 @@ import time
|
|||
import copy
|
||||
from pubsublogger import publisher
|
||||
from packages import lib_words
|
||||
import os
|
||||
import datetime
|
||||
import calendar
|
||||
|
||||
from Helper import Process
|
||||
|
||||
# Config Variables
|
||||
Refresh_rate = 60*5 #sec
|
||||
BlackListTermsSet_Name = "BlackListSetTermSet"
|
||||
TrackedTermsSet_Name = "TrackedSetTermSet"
|
||||
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
|
||||
|
@ -91,7 +89,6 @@ def manage_top_set():
|
|||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||
# Port of the redis instance used by pubsublogger
|
||||
|
@ -107,7 +104,6 @@ if __name__ == '__main__':
|
|||
port=p.config.get("Redis_Level_DB_TermFreq", "port"),
|
||||
db=p.config.get("Redis_Level_DB_TermFreq", "db"))
|
||||
|
||||
# FUNCTIONS #
|
||||
publisher.info("Script Curve_manage_top_set started")
|
||||
|
||||
# Sent to the logging a description of the module
|
||||
|
@ -116,15 +112,12 @@ if __name__ == '__main__':
|
|||
manage_top_set()
|
||||
|
||||
while True:
|
||||
# Get one message from the input queue
|
||||
# Get one message from the input queue (module only work if linked with a queue)
|
||||
message = p.get_from_set()
|
||||
if message is None:
|
||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||
print 'sleeping'
|
||||
time.sleep(60) # sleep a long time then manage the set
|
||||
time.sleep(Refresh_rate) # sleep a long time then manage the set
|
||||
manage_top_set()
|
||||
continue
|
||||
|
||||
# Do something with the message from the queue
|
||||
#manage_top_set()
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
#!/usr/bin/env python2
|
||||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
Template for new modules
|
||||
This module makes statistics for some modules and providers
|
||||
|
||||
"""
|
||||
|
||||
import time
|
||||
|
@ -56,29 +57,6 @@ def compute_most_posted(server, message):
|
|||
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
||||
print redis_progression_name_set
|
||||
|
||||
# if keyword in server.smembers(redis_progression_name_set): # if it is already in the set
|
||||
# return
|
||||
#
|
||||
# if (server.scard(redis_progression_name_set) < max_set_cardinality):
|
||||
# server.sadd(redis_progression_name_set, keyword)
|
||||
|
||||
# else: #not in the set
|
||||
# #Check value for all members
|
||||
# member_set = []
|
||||
# for keyw in server.smembers(redis_progression_name_set):
|
||||
# keyw_value = server.hget(paste_date, module+'-'+keyw)
|
||||
# if keyw_value is not None:
|
||||
# member_set.append((keyw, int(keyw_value)))
|
||||
# else: #No data for this set for today
|
||||
# member_set.append((keyw, int(0)))
|
||||
# member_set.sort(key=lambda tup: tup[1])
|
||||
# if len(member_set) > 0:
|
||||
# if member_set[0][1] < keyword_total_sum:
|
||||
# #remove min from set and add the new one
|
||||
# print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
|
||||
# server.srem(redis_progression_name_set, member_set[0][0])
|
||||
# server.sadd(redis_progression_name_set, keyword)
|
||||
|
||||
|
||||
def compute_provider_info(server, path):
|
||||
redis_all_provider = 'all_provider_set'
|
||||
|
@ -100,22 +78,6 @@ def compute_provider_info(server, path):
|
|||
new_avg = float(sum_size) / float(num_paste)
|
||||
server.hset(paste_provider +'_avg', paste_date, new_avg)
|
||||
|
||||
'''
|
||||
prev_num_paste = server.hget(paste_provider+'_num', paste_date)
|
||||
if prev_num_paste is not None:
|
||||
ok = server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1)
|
||||
prev_sum_size = server.hget(paste_provider+'_size', paste_date)
|
||||
|
||||
if prev_sum_size is not None:
|
||||
ok = server.hset(paste_provider+'_size', paste_date, float(prev_sum_size)+paste_size)
|
||||
new_avg = (float(prev_sum_size)+paste_size) / (int(prev_num_paste)+1)
|
||||
else:
|
||||
ok = server.hset(paste_provider+'_size', paste_date, paste_size)
|
||||
|
||||
else:
|
||||
ok = server.hset(paste_provider+'_num', paste_date, 1)
|
||||
prev_num_paste = 0
|
||||
'''
|
||||
|
||||
#
|
||||
# Compute Most Posted
|
||||
|
@ -136,28 +98,6 @@ def compute_provider_info(server, path):
|
|||
server.zrem(redis_avg_size_name_set, member_set[0][0])
|
||||
server.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
||||
|
||||
'''
|
||||
if paste_provider not in server.smembers(redis_avg_size_name_set): # if it is already in the set
|
||||
if (server.scard(redis_avg_size_name_set) < max_set_cardinality):
|
||||
server.sadd(redis_avg_size_name_set, paste_provider)
|
||||
|
||||
else: #set full capacity
|
||||
#Check value for all members
|
||||
member_set = []
|
||||
for provider in server.smembers(redis_avg_size_name_set):
|
||||
curr_avg = 0.0
|
||||
curr_size = server.hget(provider+'_size', paste_date)
|
||||
curr_num = server.hget(provider+'_num', paste_date)
|
||||
if (curr_size is not None) and (curr_num is not None):
|
||||
curr_avg = float(curr_size) / float(curr_num)
|
||||
member_set.append((provider, curr_avg))
|
||||
member_set.sort(key=lambda tup: tup[1])
|
||||
if member_set[0][1] < new_avg:
|
||||
#remove min from set and add the new one
|
||||
print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
|
||||
server.srem(redis_avg_size_name_set, member_set[0][0])
|
||||
server.sadd(redis_avg_size_name_set, paste_provider)
|
||||
'''
|
||||
|
||||
# Num
|
||||
# if set not full or provider already present
|
||||
|
@ -172,27 +112,6 @@ def compute_provider_info(server, path):
|
|||
server.zrem(member_set[0][0])
|
||||
server.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
||||
|
||||
'''
|
||||
if paste_provider not in server.smembers(redis_providers_name_set): # if it is already in the set
|
||||
if (server.scard(redis_providers_name_set) < max_set_cardinality):
|
||||
server.sadd(redis_providers_name_set, paste_provider)
|
||||
|
||||
else: #set full capacity
|
||||
#Check value for all members
|
||||
member_set = []
|
||||
for provider in server.smembers(redis_providers_name_set):
|
||||
curr_num = 0
|
||||
curr_num = server.hget(provider+'_num', paste_date)
|
||||
if curr_num is not None:
|
||||
member_set.append((provider, int(curr_num)))
|
||||
member_set.sort(key=lambda tup: tup[1])
|
||||
if len(member_set) > 0:
|
||||
if member_set[0][1] < int(prev_num_paste)+1:
|
||||
#remove min from set and add the new one
|
||||
print 'Num - adding ' +paste_provider+ '(' +str(int(prev_num_paste)+1)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
|
||||
server.srem(redis_providers_name_set, member_set[0][0])
|
||||
server.sadd(redis_providers_name_set, paste_provider)
|
||||
'''
|
||||
|
||||
if __name__ == '__main__':
|
||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||
|
|
|
@ -173,6 +173,7 @@ def showpaste(content_range):
|
|||
if content_range != 0:
|
||||
p_content = p_content[0:content_range]
|
||||
|
||||
|
||||
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list)
|
||||
|
||||
def getPastebyType(server, module_name):
|
||||
|
@ -205,20 +206,6 @@ def get_top_relevant_data(server, module_name):
|
|||
member_set.insert(0, ("passed_days", days))
|
||||
return member_set
|
||||
|
||||
# member_set = []
|
||||
# for keyw in server.smembers(redis_progression_name_set):
|
||||
# redis_progression_name = module_name+'-'+keyw
|
||||
# keyw_value = server.hget(date ,redis_progression_name)
|
||||
# keyw_value = keyw_value if keyw_value is not None else 0
|
||||
# member_set.append((keyw, int(keyw_value)))
|
||||
# member_set.sort(key=lambda tup: tup[1], reverse=True)
|
||||
# if member_set[0][1] == 0: #No data for this date
|
||||
# days += 1
|
||||
# continue
|
||||
# else:
|
||||
# member_set.insert(0, ("passed_days", days))
|
||||
# return member_set
|
||||
|
||||
|
||||
def Term_getValueOverRange(word, startDate, num_day):
|
||||
passed_days = 0
|
||||
|
@ -357,34 +344,6 @@ def providersChart():
|
|||
return jsonify(member_set)
|
||||
|
||||
|
||||
'''
|
||||
# Iterate over element in top_x_set and retreive their value
|
||||
member_set = []
|
||||
for keyw in r_serv_charts.smembers(redis_provider_name_set):
|
||||
redis_provider_name_size = keyw+'_'+'size'
|
||||
redis_provider_name_num = keyw+'_'+'num'
|
||||
keyw_value_size = r_serv_charts.hget(redis_provider_name_size, get_date_range(0)[0])
|
||||
keyw_value_size = keyw_value_size if keyw_value_size is not None else 0.0
|
||||
keyw_value_num = r_serv_charts.hget(redis_provider_name_num, get_date_range(0)[0])
|
||||
|
||||
if keyw_value_num is not None:
|
||||
keyw_value_num = int(keyw_value_num)
|
||||
else:
|
||||
if module_name == "size":
|
||||
keyw_value_num = 10000000000
|
||||
else:
|
||||
keyw_value_num = 0
|
||||
if module_name == "size":
|
||||
member_set.append((keyw, float(keyw_value_size)/float(keyw_value_num)))
|
||||
else:
|
||||
member_set.append((keyw, float(keyw_value_num)))
|
||||
|
||||
member_set.sort(key=lambda tup: tup[1], reverse=True)
|
||||
if len(member_set) == 0:
|
||||
member_set.append(("No relevant data", float(100)))
|
||||
return jsonify(member_set)
|
||||
'''
|
||||
|
||||
|
||||
@app.route("/search", methods=['POST'])
|
||||
def search():
|
||||
|
@ -771,50 +730,6 @@ def terms_plot_top_data():
|
|||
|
||||
return jsonify(to_return)
|
||||
|
||||
'''
|
||||
to_return = []
|
||||
for term in r_serv_term.smembers(the_set):
|
||||
value_range = []
|
||||
tot_sum = 0
|
||||
for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay):
|
||||
value = r_serv_term.hget(timestamp, term)
|
||||
curr_value_range = int(value) if value is not None else 0
|
||||
tot_sum += curr_value_range
|
||||
value_range.append([timestamp, curr_value_range])
|
||||
|
||||
to_return.append([term, value_range, tot_sum])
|
||||
|
||||
return jsonify(to_return)
|
||||
'''
|
||||
|
||||
|
||||
@app.route("/test/") #completely shows the paste in a new tab
|
||||
def test():
|
||||
|
||||
server = r_serv_term
|
||||
array1 = []
|
||||
for w in server.smembers('TopTermFreq_set_day'):
|
||||
val = server.hget('1471564800', w)
|
||||
val = val if val is not None else 0
|
||||
val2 = server.hget('1471478400', w)
|
||||
val2 = val2 if val2 is not None else 0
|
||||
array1.append((w, (int(val), int(val2))))
|
||||
|
||||
# array2 = []
|
||||
# for w in server.smembers('TopTermFreq_set_week'):
|
||||
# array2.append((w, int(server.hget('1471478400', w))))
|
||||
|
||||
array1.sort(key=lambda tup: tup[1][0])
|
||||
stri = "<h1> day </h1>"
|
||||
for e in array1:
|
||||
stri += "<p>"+ e[0] + "\t" + str(e[1]) +"</p>"
|
||||
# stri += "<h1> week </h1>"
|
||||
# for e in array2:
|
||||
# stri += "<p>"+ e[0] + "\t" + str(e[1]) +"</p>"
|
||||
|
||||
print stri
|
||||
return stri
|
||||
|
||||
|
||||
|
||||
@app.route("/showsavedpaste/") #completely shows the paste in a new tab
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
* Version: 1.0
|
||||
* Author: Jeff Millies
|
||||
* Author URI:
|
||||
*
|
||||
* Slight modification for better display in Sentiment webpages
|
||||
*/
|
||||
(function ($) {
|
||||
var FlexGauge = function (o) {
|
||||
|
|
|
@ -115,7 +115,6 @@ $(document).ready(function(){
|
|||
$('#myTable').on( 'draw.dt', function () {
|
||||
// On click, get html content from url and update the corresponding modal
|
||||
$("[data-toggle='modal']").unbind('click.openmodal').on("click.openmodal", function (event) {
|
||||
console.log('hi');
|
||||
event.preventDefault();
|
||||
var modal=$(this);
|
||||
var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num');
|
||||
|
|
Loading…
Reference in New Issue