mirror of https://github.com/CIRCL/AIL-framework
Removed commented code
parent
27ae577e00
commit
2fd2c8af39
44
bin/Curve.py
44
bin/Curve.py
|
@ -1,9 +1,6 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
"""
|
"""
|
||||||
The ZMQ_Sub_Curve Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
|
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
|
||||||
|
|
||||||
This modules update a .csv file used to draw curves representing selected
|
This modules update a .csv file used to draw curves representing selected
|
||||||
|
@ -15,11 +12,9 @@ words and their occurency per day.
|
||||||
the same Subscriber name in both of them.
|
the same Subscriber name in both of them.
|
||||||
|
|
||||||
|
|
||||||
|
This Module is also used for term frequency.
|
||||||
|
|
||||||
|
/!\ Top set management is done in the module Curve_manage_top_set
|
||||||
zrank for each day
|
|
||||||
week -> top zrank for each day
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Requirements
|
Requirements
|
||||||
|
@ -117,48 +112,21 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
|
|
||||||
low_word = word.lower()
|
low_word = word.lower()
|
||||||
#Old curve
|
#Old curve with words in file
|
||||||
r_serv1.hincrby(low_word, date, int(score))
|
r_serv1.hincrby(low_word, date, int(score))
|
||||||
|
|
||||||
|
|
||||||
# Update redis
|
# Update redis
|
||||||
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
|
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
|
||||||
|
|
||||||
|
# Add in set only if term is not in the blacklist
|
||||||
if low_word not in server_term.smembers(BlackListTermsSet_Name):
|
if low_word not in server_term.smembers(BlackListTermsSet_Name):
|
||||||
server_term.zincrby(curr_set, low_word, float(score))
|
server_term.zincrby(curr_set, low_word, float(score))
|
||||||
|
|
||||||
#Add more info for tracked terms
|
#Add more info for tracked terms
|
||||||
check_if_tracked_term(low_word, filename)
|
check_if_tracked_term(low_word, filename)
|
||||||
|
|
||||||
# Manage Top set is done in module Curve_manage_top_sets
|
|
||||||
|
|
||||||
'''
|
|
||||||
if server_term.scard(curr_set) < top_term_freq_max_set_cardinality:
|
|
||||||
server_term.sadd(curr_set, low_word)
|
|
||||||
elif server_term.sismember(curr_set, low_word):
|
|
||||||
continue
|
|
||||||
|
|
||||||
else:
|
|
||||||
|
|
||||||
|
|
||||||
#timer = time.clock()
|
|
||||||
curr_word_value = getValueOverRange(low_word, timestamp, curr_num_day)
|
|
||||||
#print 'curr_range', time.clock() - timer
|
|
||||||
top_termFreq = server_term.smembers(curr_set)
|
|
||||||
sorted_top_termFreq_set = []
|
|
||||||
#timer = time.clock()
|
|
||||||
for word in top_termFreq:
|
|
||||||
word_value = getValueOverRange(word, timestamp, curr_num_day)
|
|
||||||
sorted_top_termFreq_set.append((word, word_value))
|
|
||||||
|
|
||||||
sorted_top_termFreq_set.sort(key=lambda tup: tup[1])
|
|
||||||
#print 'whole_range', time.clock() - timer
|
|
||||||
|
|
||||||
if curr_word_value > int(sorted_top_termFreq_set[0][1]):
|
|
||||||
print str(curr_num_day)+':', low_word, curr_word_value, '\t', sorted_top_termFreq_set[0][0], sorted_top_termFreq_set[0][1], '\t', curr_word_value > sorted_top_termFreq_set[0][1]
|
|
||||||
server_term.srem(curr_set, sorted_top_termFreq_set[0][0])
|
|
||||||
server_term.sadd(curr_set, low_word)
|
|
||||||
'''
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
if generate_new_graph:
|
if generate_new_graph:
|
||||||
generate_new_graph = False
|
generate_new_graph = False
|
||||||
print 'Building graph'
|
print 'Building graph'
|
||||||
|
|
|
@ -2,10 +2,8 @@
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
This module manage top sets for terms frequency.
|
||||||
|
Every 'refresh_rate' update the weekly and monthly set
|
||||||
zrank for each day
|
|
||||||
week -> top zrank for each day
|
|
||||||
|
|
||||||
|
|
||||||
Requirements
|
Requirements
|
||||||
|
@ -22,13 +20,13 @@ import time
|
||||||
import copy
|
import copy
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import lib_words
|
from packages import lib_words
|
||||||
import os
|
|
||||||
import datetime
|
import datetime
|
||||||
import calendar
|
import calendar
|
||||||
|
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
# Config Variables
|
# Config Variables
|
||||||
|
Refresh_rate = 60*5 #sec
|
||||||
BlackListTermsSet_Name = "BlackListSetTermSet"
|
BlackListTermsSet_Name = "BlackListSetTermSet"
|
||||||
TrackedTermsSet_Name = "TrackedSetTermSet"
|
TrackedTermsSet_Name = "TrackedSetTermSet"
|
||||||
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
|
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
|
||||||
|
@ -91,7 +89,6 @@ def manage_top_set():
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||||
# Port of the redis instance used by pubsublogger
|
# Port of the redis instance used by pubsublogger
|
||||||
|
@ -107,7 +104,6 @@ if __name__ == '__main__':
|
||||||
port=p.config.get("Redis_Level_DB_TermFreq", "port"),
|
port=p.config.get("Redis_Level_DB_TermFreq", "port"),
|
||||||
db=p.config.get("Redis_Level_DB_TermFreq", "db"))
|
db=p.config.get("Redis_Level_DB_TermFreq", "db"))
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("Script Curve_manage_top_set started")
|
publisher.info("Script Curve_manage_top_set started")
|
||||||
|
|
||||||
# Sent to the logging a description of the module
|
# Sent to the logging a description of the module
|
||||||
|
@ -116,15 +112,12 @@ if __name__ == '__main__':
|
||||||
manage_top_set()
|
manage_top_set()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# Get one message from the input queue
|
# Get one message from the input queue (module only work if linked with a queue)
|
||||||
message = p.get_from_set()
|
message = p.get_from_set()
|
||||||
if message is None:
|
if message is None:
|
||||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||||
print 'sleeping'
|
print 'sleeping'
|
||||||
time.sleep(60) # sleep a long time then manage the set
|
time.sleep(Refresh_rate) # sleep a long time then manage the set
|
||||||
manage_top_set()
|
manage_top_set()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Do something with the message from the queue
|
|
||||||
#manage_top_set()
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
"""
|
"""
|
||||||
Template for new modules
|
This module makes statistics for some modules and providers
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import time
|
import time
|
||||||
|
@ -56,29 +57,6 @@ def compute_most_posted(server, message):
|
||||||
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
||||||
print redis_progression_name_set
|
print redis_progression_name_set
|
||||||
|
|
||||||
# if keyword in server.smembers(redis_progression_name_set): # if it is already in the set
|
|
||||||
# return
|
|
||||||
#
|
|
||||||
# if (server.scard(redis_progression_name_set) < max_set_cardinality):
|
|
||||||
# server.sadd(redis_progression_name_set, keyword)
|
|
||||||
|
|
||||||
# else: #not in the set
|
|
||||||
# #Check value for all members
|
|
||||||
# member_set = []
|
|
||||||
# for keyw in server.smembers(redis_progression_name_set):
|
|
||||||
# keyw_value = server.hget(paste_date, module+'-'+keyw)
|
|
||||||
# if keyw_value is not None:
|
|
||||||
# member_set.append((keyw, int(keyw_value)))
|
|
||||||
# else: #No data for this set for today
|
|
||||||
# member_set.append((keyw, int(0)))
|
|
||||||
# member_set.sort(key=lambda tup: tup[1])
|
|
||||||
# if len(member_set) > 0:
|
|
||||||
# if member_set[0][1] < keyword_total_sum:
|
|
||||||
# #remove min from set and add the new one
|
|
||||||
# print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
|
|
||||||
# server.srem(redis_progression_name_set, member_set[0][0])
|
|
||||||
# server.sadd(redis_progression_name_set, keyword)
|
|
||||||
|
|
||||||
|
|
||||||
def compute_provider_info(server, path):
|
def compute_provider_info(server, path):
|
||||||
redis_all_provider = 'all_provider_set'
|
redis_all_provider = 'all_provider_set'
|
||||||
|
@ -100,22 +78,6 @@ def compute_provider_info(server, path):
|
||||||
new_avg = float(sum_size) / float(num_paste)
|
new_avg = float(sum_size) / float(num_paste)
|
||||||
server.hset(paste_provider +'_avg', paste_date, new_avg)
|
server.hset(paste_provider +'_avg', paste_date, new_avg)
|
||||||
|
|
||||||
'''
|
|
||||||
prev_num_paste = server.hget(paste_provider+'_num', paste_date)
|
|
||||||
if prev_num_paste is not None:
|
|
||||||
ok = server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1)
|
|
||||||
prev_sum_size = server.hget(paste_provider+'_size', paste_date)
|
|
||||||
|
|
||||||
if prev_sum_size is not None:
|
|
||||||
ok = server.hset(paste_provider+'_size', paste_date, float(prev_sum_size)+paste_size)
|
|
||||||
new_avg = (float(prev_sum_size)+paste_size) / (int(prev_num_paste)+1)
|
|
||||||
else:
|
|
||||||
ok = server.hset(paste_provider+'_size', paste_date, paste_size)
|
|
||||||
|
|
||||||
else:
|
|
||||||
ok = server.hset(paste_provider+'_num', paste_date, 1)
|
|
||||||
prev_num_paste = 0
|
|
||||||
'''
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Compute Most Posted
|
# Compute Most Posted
|
||||||
|
@ -136,28 +98,6 @@ def compute_provider_info(server, path):
|
||||||
server.zrem(redis_avg_size_name_set, member_set[0][0])
|
server.zrem(redis_avg_size_name_set, member_set[0][0])
|
||||||
server.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
server.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
||||||
|
|
||||||
'''
|
|
||||||
if paste_provider not in server.smembers(redis_avg_size_name_set): # if it is already in the set
|
|
||||||
if (server.scard(redis_avg_size_name_set) < max_set_cardinality):
|
|
||||||
server.sadd(redis_avg_size_name_set, paste_provider)
|
|
||||||
|
|
||||||
else: #set full capacity
|
|
||||||
#Check value for all members
|
|
||||||
member_set = []
|
|
||||||
for provider in server.smembers(redis_avg_size_name_set):
|
|
||||||
curr_avg = 0.0
|
|
||||||
curr_size = server.hget(provider+'_size', paste_date)
|
|
||||||
curr_num = server.hget(provider+'_num', paste_date)
|
|
||||||
if (curr_size is not None) and (curr_num is not None):
|
|
||||||
curr_avg = float(curr_size) / float(curr_num)
|
|
||||||
member_set.append((provider, curr_avg))
|
|
||||||
member_set.sort(key=lambda tup: tup[1])
|
|
||||||
if member_set[0][1] < new_avg:
|
|
||||||
#remove min from set and add the new one
|
|
||||||
print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
|
|
||||||
server.srem(redis_avg_size_name_set, member_set[0][0])
|
|
||||||
server.sadd(redis_avg_size_name_set, paste_provider)
|
|
||||||
'''
|
|
||||||
|
|
||||||
# Num
|
# Num
|
||||||
# if set not full or provider already present
|
# if set not full or provider already present
|
||||||
|
@ -172,27 +112,6 @@ def compute_provider_info(server, path):
|
||||||
server.zrem(member_set[0][0])
|
server.zrem(member_set[0][0])
|
||||||
server.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
server.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
||||||
|
|
||||||
'''
|
|
||||||
if paste_provider not in server.smembers(redis_providers_name_set): # if it is already in the set
|
|
||||||
if (server.scard(redis_providers_name_set) < max_set_cardinality):
|
|
||||||
server.sadd(redis_providers_name_set, paste_provider)
|
|
||||||
|
|
||||||
else: #set full capacity
|
|
||||||
#Check value for all members
|
|
||||||
member_set = []
|
|
||||||
for provider in server.smembers(redis_providers_name_set):
|
|
||||||
curr_num = 0
|
|
||||||
curr_num = server.hget(provider+'_num', paste_date)
|
|
||||||
if curr_num is not None:
|
|
||||||
member_set.append((provider, int(curr_num)))
|
|
||||||
member_set.sort(key=lambda tup: tup[1])
|
|
||||||
if len(member_set) > 0:
|
|
||||||
if member_set[0][1] < int(prev_num_paste)+1:
|
|
||||||
#remove min from set and add the new one
|
|
||||||
print 'Num - adding ' +paste_provider+ '(' +str(int(prev_num_paste)+1)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
|
|
||||||
server.srem(redis_providers_name_set, member_set[0][0])
|
|
||||||
server.sadd(redis_providers_name_set, paste_provider)
|
|
||||||
'''
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||||
|
|
|
@ -173,6 +173,7 @@ def showpaste(content_range):
|
||||||
if content_range != 0:
|
if content_range != 0:
|
||||||
p_content = p_content[0:content_range]
|
p_content = p_content[0:content_range]
|
||||||
|
|
||||||
|
|
||||||
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list)
|
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list)
|
||||||
|
|
||||||
def getPastebyType(server, module_name):
|
def getPastebyType(server, module_name):
|
||||||
|
@ -205,20 +206,6 @@ def get_top_relevant_data(server, module_name):
|
||||||
member_set.insert(0, ("passed_days", days))
|
member_set.insert(0, ("passed_days", days))
|
||||||
return member_set
|
return member_set
|
||||||
|
|
||||||
# member_set = []
|
|
||||||
# for keyw in server.smembers(redis_progression_name_set):
|
|
||||||
# redis_progression_name = module_name+'-'+keyw
|
|
||||||
# keyw_value = server.hget(date ,redis_progression_name)
|
|
||||||
# keyw_value = keyw_value if keyw_value is not None else 0
|
|
||||||
# member_set.append((keyw, int(keyw_value)))
|
|
||||||
# member_set.sort(key=lambda tup: tup[1], reverse=True)
|
|
||||||
# if member_set[0][1] == 0: #No data for this date
|
|
||||||
# days += 1
|
|
||||||
# continue
|
|
||||||
# else:
|
|
||||||
# member_set.insert(0, ("passed_days", days))
|
|
||||||
# return member_set
|
|
||||||
|
|
||||||
|
|
||||||
def Term_getValueOverRange(word, startDate, num_day):
|
def Term_getValueOverRange(word, startDate, num_day):
|
||||||
passed_days = 0
|
passed_days = 0
|
||||||
|
@ -357,34 +344,6 @@ def providersChart():
|
||||||
return jsonify(member_set)
|
return jsonify(member_set)
|
||||||
|
|
||||||
|
|
||||||
'''
|
|
||||||
# Iterate over element in top_x_set and retreive their value
|
|
||||||
member_set = []
|
|
||||||
for keyw in r_serv_charts.smembers(redis_provider_name_set):
|
|
||||||
redis_provider_name_size = keyw+'_'+'size'
|
|
||||||
redis_provider_name_num = keyw+'_'+'num'
|
|
||||||
keyw_value_size = r_serv_charts.hget(redis_provider_name_size, get_date_range(0)[0])
|
|
||||||
keyw_value_size = keyw_value_size if keyw_value_size is not None else 0.0
|
|
||||||
keyw_value_num = r_serv_charts.hget(redis_provider_name_num, get_date_range(0)[0])
|
|
||||||
|
|
||||||
if keyw_value_num is not None:
|
|
||||||
keyw_value_num = int(keyw_value_num)
|
|
||||||
else:
|
|
||||||
if module_name == "size":
|
|
||||||
keyw_value_num = 10000000000
|
|
||||||
else:
|
|
||||||
keyw_value_num = 0
|
|
||||||
if module_name == "size":
|
|
||||||
member_set.append((keyw, float(keyw_value_size)/float(keyw_value_num)))
|
|
||||||
else:
|
|
||||||
member_set.append((keyw, float(keyw_value_num)))
|
|
||||||
|
|
||||||
member_set.sort(key=lambda tup: tup[1], reverse=True)
|
|
||||||
if len(member_set) == 0:
|
|
||||||
member_set.append(("No relevant data", float(100)))
|
|
||||||
return jsonify(member_set)
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/search", methods=['POST'])
|
@app.route("/search", methods=['POST'])
|
||||||
def search():
|
def search():
|
||||||
|
@ -771,50 +730,6 @@ def terms_plot_top_data():
|
||||||
|
|
||||||
return jsonify(to_return)
|
return jsonify(to_return)
|
||||||
|
|
||||||
'''
|
|
||||||
to_return = []
|
|
||||||
for term in r_serv_term.smembers(the_set):
|
|
||||||
value_range = []
|
|
||||||
tot_sum = 0
|
|
||||||
for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay):
|
|
||||||
value = r_serv_term.hget(timestamp, term)
|
|
||||||
curr_value_range = int(value) if value is not None else 0
|
|
||||||
tot_sum += curr_value_range
|
|
||||||
value_range.append([timestamp, curr_value_range])
|
|
||||||
|
|
||||||
to_return.append([term, value_range, tot_sum])
|
|
||||||
|
|
||||||
return jsonify(to_return)
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/test/") #completely shows the paste in a new tab
|
|
||||||
def test():
|
|
||||||
|
|
||||||
server = r_serv_term
|
|
||||||
array1 = []
|
|
||||||
for w in server.smembers('TopTermFreq_set_day'):
|
|
||||||
val = server.hget('1471564800', w)
|
|
||||||
val = val if val is not None else 0
|
|
||||||
val2 = server.hget('1471478400', w)
|
|
||||||
val2 = val2 if val2 is not None else 0
|
|
||||||
array1.append((w, (int(val), int(val2))))
|
|
||||||
|
|
||||||
# array2 = []
|
|
||||||
# for w in server.smembers('TopTermFreq_set_week'):
|
|
||||||
# array2.append((w, int(server.hget('1471478400', w))))
|
|
||||||
|
|
||||||
array1.sort(key=lambda tup: tup[1][0])
|
|
||||||
stri = "<h1> day </h1>"
|
|
||||||
for e in array1:
|
|
||||||
stri += "<p>"+ e[0] + "\t" + str(e[1]) +"</p>"
|
|
||||||
# stri += "<h1> week </h1>"
|
|
||||||
# for e in array2:
|
|
||||||
# stri += "<p>"+ e[0] + "\t" + str(e[1]) +"</p>"
|
|
||||||
|
|
||||||
print stri
|
|
||||||
return stri
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/showsavedpaste/") #completely shows the paste in a new tab
|
@app.route("/showsavedpaste/") #completely shows the paste in a new tab
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
* Version: 1.0
|
* Version: 1.0
|
||||||
* Author: Jeff Millies
|
* Author: Jeff Millies
|
||||||
* Author URI:
|
* Author URI:
|
||||||
|
*
|
||||||
|
* Slight modification for better display in Sentiment webpages
|
||||||
*/
|
*/
|
||||||
(function ($) {
|
(function ($) {
|
||||||
var FlexGauge = function (o) {
|
var FlexGauge = function (o) {
|
||||||
|
|
|
@ -115,7 +115,6 @@ $(document).ready(function(){
|
||||||
$('#myTable').on( 'draw.dt', function () {
|
$('#myTable').on( 'draw.dt', function () {
|
||||||
// On click, get html content from url and update the corresponding modal
|
// On click, get html content from url and update the corresponding modal
|
||||||
$("[data-toggle='modal']").unbind('click.openmodal').on("click.openmodal", function (event) {
|
$("[data-toggle='modal']").unbind('click.openmodal').on("click.openmodal", function (event) {
|
||||||
console.log('hi');
|
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
var modal=$(this);
|
var modal=$(this);
|
||||||
var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num');
|
var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num');
|
||||||
|
|
Loading…
Reference in New Issue