Removed commented code

pull/68/head
Mokaddem 2016-08-22 20:59:56 +02:00
parent 27ae577e00
commit 2fd2c8af39
6 changed files with 16 additions and 220 deletions

View File

@ -1,9 +1,6 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The ZMQ_Sub_Curve Module
============================
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
This modules update a .csv file used to draw curves representing selected This modules update a .csv file used to draw curves representing selected
@ -15,11 +12,9 @@ words and their occurency per day.
the same Subscriber name in both of them. the same Subscriber name in both of them.
This Module is also used for term frequency.
/!\ Top set management is done in the module Curve_manage_top_set
zrank for each day
week -> top zrank for each day
Requirements Requirements
@ -117,48 +112,21 @@ if __name__ == "__main__":
low_word = word.lower() low_word = word.lower()
#Old curve #Old curve with words in file
r_serv1.hincrby(low_word, date, int(score)) r_serv1.hincrby(low_word, date, int(score))
# Update redis # Update redis
curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score)))
# Add in set only if term is not in the blacklist
if low_word not in server_term.smembers(BlackListTermsSet_Name): if low_word not in server_term.smembers(BlackListTermsSet_Name):
server_term.zincrby(curr_set, low_word, float(score)) server_term.zincrby(curr_set, low_word, float(score))
#Add more info for tracked terms #Add more info for tracked terms
check_if_tracked_term(low_word, filename) check_if_tracked_term(low_word, filename)
# Manage Top set is done in module Curve_manage_top_sets
'''
if server_term.scard(curr_set) < top_term_freq_max_set_cardinality:
server_term.sadd(curr_set, low_word)
elif server_term.sismember(curr_set, low_word):
continue
else:
#timer = time.clock()
curr_word_value = getValueOverRange(low_word, timestamp, curr_num_day)
#print 'curr_range', time.clock() - timer
top_termFreq = server_term.smembers(curr_set)
sorted_top_termFreq_set = []
#timer = time.clock()
for word in top_termFreq:
word_value = getValueOverRange(word, timestamp, curr_num_day)
sorted_top_termFreq_set.append((word, word_value))
sorted_top_termFreq_set.sort(key=lambda tup: tup[1])
#print 'whole_range', time.clock() - timer
if curr_word_value > int(sorted_top_termFreq_set[0][1]):
print str(curr_num_day)+':', low_word, curr_word_value, '\t', sorted_top_termFreq_set[0][0], sorted_top_termFreq_set[0][1], '\t', curr_word_value > sorted_top_termFreq_set[0][1]
server_term.srem(curr_set, sorted_top_termFreq_set[0][0])
server_term.sadd(curr_set, low_word)
'''
else: else:
if generate_new_graph: if generate_new_graph:
generate_new_graph = False generate_new_graph = False
print 'Building graph' print 'Building graph'

View File

@ -2,10 +2,8 @@
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
This module manage top sets for terms frequency.
Every 'refresh_rate' update the weekly and monthly set
zrank for each day
week -> top zrank for each day
Requirements Requirements
@ -22,13 +20,13 @@ import time
import copy import copy
from pubsublogger import publisher from pubsublogger import publisher
from packages import lib_words from packages import lib_words
import os
import datetime import datetime
import calendar import calendar
from Helper import Process from Helper import Process
# Config Variables # Config Variables
Refresh_rate = 60*5 #sec
BlackListTermsSet_Name = "BlackListSetTermSet" BlackListTermsSet_Name = "BlackListSetTermSet"
TrackedTermsSet_Name = "TrackedSetTermSet" TrackedTermsSet_Name = "TrackedSetTermSet"
top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set
@ -91,7 +89,6 @@ def manage_top_set():
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger # Port of the redis instance used by pubsublogger
@ -107,7 +104,6 @@ if __name__ == '__main__':
port=p.config.get("Redis_Level_DB_TermFreq", "port"), port=p.config.get("Redis_Level_DB_TermFreq", "port"),
db=p.config.get("Redis_Level_DB_TermFreq", "db")) db=p.config.get("Redis_Level_DB_TermFreq", "db"))
# FUNCTIONS #
publisher.info("Script Curve_manage_top_set started") publisher.info("Script Curve_manage_top_set started")
# Sent to the logging a description of the module # Sent to the logging a description of the module
@ -116,15 +112,12 @@ if __name__ == '__main__':
manage_top_set() manage_top_set()
while True: while True:
# Get one message from the input queue # Get one message from the input queue (module only work if linked with a queue)
message = p.get_from_set() message = p.get_from_set()
if message is None: if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section)) publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping' print 'sleeping'
time.sleep(60) # sleep a long time then manage the set time.sleep(Refresh_rate) # sleep a long time then manage the set
manage_top_set() manage_top_set()
continue continue
# Do something with the message from the queue
#manage_top_set()

View File

@ -1,7 +1,8 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
Template for new modules This module makes statistics for some modules and providers
""" """
import time import time
@ -56,29 +57,6 @@ def compute_most_posted(server, message):
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
print redis_progression_name_set print redis_progression_name_set
# if keyword in server.smembers(redis_progression_name_set): # if it is already in the set
# return
#
# if (server.scard(redis_progression_name_set) < max_set_cardinality):
# server.sadd(redis_progression_name_set, keyword)
# else: #not in the set
# #Check value for all members
# member_set = []
# for keyw in server.smembers(redis_progression_name_set):
# keyw_value = server.hget(paste_date, module+'-'+keyw)
# if keyw_value is not None:
# member_set.append((keyw, int(keyw_value)))
# else: #No data for this set for today
# member_set.append((keyw, int(0)))
# member_set.sort(key=lambda tup: tup[1])
# if len(member_set) > 0:
# if member_set[0][1] < keyword_total_sum:
# #remove min from set and add the new one
# print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
# server.srem(redis_progression_name_set, member_set[0][0])
# server.sadd(redis_progression_name_set, keyword)
def compute_provider_info(server, path): def compute_provider_info(server, path):
redis_all_provider = 'all_provider_set' redis_all_provider = 'all_provider_set'
@ -100,22 +78,6 @@ def compute_provider_info(server, path):
new_avg = float(sum_size) / float(num_paste) new_avg = float(sum_size) / float(num_paste)
server.hset(paste_provider +'_avg', paste_date, new_avg) server.hset(paste_provider +'_avg', paste_date, new_avg)
'''
prev_num_paste = server.hget(paste_provider+'_num', paste_date)
if prev_num_paste is not None:
ok = server.hset(paste_provider+'_num', paste_date, int(prev_num_paste)+1)
prev_sum_size = server.hget(paste_provider+'_size', paste_date)
if prev_sum_size is not None:
ok = server.hset(paste_provider+'_size', paste_date, float(prev_sum_size)+paste_size)
new_avg = (float(prev_sum_size)+paste_size) / (int(prev_num_paste)+1)
else:
ok = server.hset(paste_provider+'_size', paste_date, paste_size)
else:
ok = server.hset(paste_provider+'_num', paste_date, 1)
prev_num_paste = 0
'''
# #
# Compute Most Posted # Compute Most Posted
@ -136,28 +98,6 @@ def compute_provider_info(server, path):
server.zrem(redis_avg_size_name_set, member_set[0][0]) server.zrem(redis_avg_size_name_set, member_set[0][0])
server.zadd(redis_avg_size_name_set, float(new_avg), paste_provider) server.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
'''
if paste_provider not in server.smembers(redis_avg_size_name_set): # if it is already in the set
if (server.scard(redis_avg_size_name_set) < max_set_cardinality):
server.sadd(redis_avg_size_name_set, paste_provider)
else: #set full capacity
#Check value for all members
member_set = []
for provider in server.smembers(redis_avg_size_name_set):
curr_avg = 0.0
curr_size = server.hget(provider+'_size', paste_date)
curr_num = server.hget(provider+'_num', paste_date)
if (curr_size is not None) and (curr_num is not None):
curr_avg = float(curr_size) / float(curr_num)
member_set.append((provider, curr_avg))
member_set.sort(key=lambda tup: tup[1])
if member_set[0][1] < new_avg:
#remove min from set and add the new one
print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
server.srem(redis_avg_size_name_set, member_set[0][0])
server.sadd(redis_avg_size_name_set, paste_provider)
'''
# Num # Num
# if set not full or provider already present # if set not full or provider already present
@ -172,27 +112,6 @@ def compute_provider_info(server, path):
server.zrem(member_set[0][0]) server.zrem(member_set[0][0])
server.zadd(redis_providers_name_set, float(num_paste), paste_provider) server.zadd(redis_providers_name_set, float(num_paste), paste_provider)
'''
if paste_provider not in server.smembers(redis_providers_name_set): # if it is already in the set
if (server.scard(redis_providers_name_set) < max_set_cardinality):
server.sadd(redis_providers_name_set, paste_provider)
else: #set full capacity
#Check value for all members
member_set = []
for provider in server.smembers(redis_providers_name_set):
curr_num = 0
curr_num = server.hget(provider+'_num', paste_date)
if curr_num is not None:
member_set.append((provider, int(curr_num)))
member_set.sort(key=lambda tup: tup[1])
if len(member_set) > 0:
if member_set[0][1] < int(prev_num_paste)+1:
#remove min from set and add the new one
print 'Num - adding ' +paste_provider+ '(' +str(int(prev_num_paste)+1)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
server.srem(redis_providers_name_set, member_set[0][0])
server.sadd(redis_providers_name_set, paste_provider)
'''
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)

View File

@ -173,6 +173,7 @@ def showpaste(content_range):
if content_range != 0: if content_range != 0:
p_content = p_content[0:content_range] p_content = p_content[0:content_range]
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list) return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list)
def getPastebyType(server, module_name): def getPastebyType(server, module_name):
@ -205,20 +206,6 @@ def get_top_relevant_data(server, module_name):
member_set.insert(0, ("passed_days", days)) member_set.insert(0, ("passed_days", days))
return member_set return member_set
# member_set = []
# for keyw in server.smembers(redis_progression_name_set):
# redis_progression_name = module_name+'-'+keyw
# keyw_value = server.hget(date ,redis_progression_name)
# keyw_value = keyw_value if keyw_value is not None else 0
# member_set.append((keyw, int(keyw_value)))
# member_set.sort(key=lambda tup: tup[1], reverse=True)
# if member_set[0][1] == 0: #No data for this date
# days += 1
# continue
# else:
# member_set.insert(0, ("passed_days", days))
# return member_set
def Term_getValueOverRange(word, startDate, num_day): def Term_getValueOverRange(word, startDate, num_day):
passed_days = 0 passed_days = 0
@ -357,34 +344,6 @@ def providersChart():
return jsonify(member_set) return jsonify(member_set)
'''
# Iterate over element in top_x_set and retreive their value
member_set = []
for keyw in r_serv_charts.smembers(redis_provider_name_set):
redis_provider_name_size = keyw+'_'+'size'
redis_provider_name_num = keyw+'_'+'num'
keyw_value_size = r_serv_charts.hget(redis_provider_name_size, get_date_range(0)[0])
keyw_value_size = keyw_value_size if keyw_value_size is not None else 0.0
keyw_value_num = r_serv_charts.hget(redis_provider_name_num, get_date_range(0)[0])
if keyw_value_num is not None:
keyw_value_num = int(keyw_value_num)
else:
if module_name == "size":
keyw_value_num = 10000000000
else:
keyw_value_num = 0
if module_name == "size":
member_set.append((keyw, float(keyw_value_size)/float(keyw_value_num)))
else:
member_set.append((keyw, float(keyw_value_num)))
member_set.sort(key=lambda tup: tup[1], reverse=True)
if len(member_set) == 0:
member_set.append(("No relevant data", float(100)))
return jsonify(member_set)
'''
@app.route("/search", methods=['POST']) @app.route("/search", methods=['POST'])
def search(): def search():
@ -771,50 +730,6 @@ def terms_plot_top_data():
return jsonify(to_return) return jsonify(to_return)
'''
to_return = []
for term in r_serv_term.smembers(the_set):
value_range = []
tot_sum = 0
for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay):
value = r_serv_term.hget(timestamp, term)
curr_value_range = int(value) if value is not None else 0
tot_sum += curr_value_range
value_range.append([timestamp, curr_value_range])
to_return.append([term, value_range, tot_sum])
return jsonify(to_return)
'''
@app.route("/test/") #completely shows the paste in a new tab
def test():
server = r_serv_term
array1 = []
for w in server.smembers('TopTermFreq_set_day'):
val = server.hget('1471564800', w)
val = val if val is not None else 0
val2 = server.hget('1471478400', w)
val2 = val2 if val2 is not None else 0
array1.append((w, (int(val), int(val2))))
# array2 = []
# for w in server.smembers('TopTermFreq_set_week'):
# array2.append((w, int(server.hget('1471478400', w))))
array1.sort(key=lambda tup: tup[1][0])
stri = "<h1> day </h1>"
for e in array1:
stri += "<p>"+ e[0] + "\t" + str(e[1]) +"</p>"
# stri += "<h1> week </h1>"
# for e in array2:
# stri += "<p>"+ e[0] + "\t" + str(e[1]) +"</p>"
print stri
return stri
@app.route("/showsavedpaste/") #completely shows the paste in a new tab @app.route("/showsavedpaste/") #completely shows the paste in a new tab

View File

@ -3,6 +3,8 @@
* Version: 1.0 * Version: 1.0
* Author: Jeff Millies * Author: Jeff Millies
* Author URI: * Author URI:
*
* Slight modification for better display in Sentiment webpages
*/ */
(function ($) { (function ($) {
var FlexGauge = function (o) { var FlexGauge = function (o) {

View File

@ -115,7 +115,6 @@ $(document).ready(function(){
$('#myTable').on( 'draw.dt', function () { $('#myTable').on( 'draw.dt', function () {
// On click, get html content from url and update the corresponding modal // On click, get html content from url and update the corresponding modal
$("[data-toggle='modal']").unbind('click.openmodal').on("click.openmodal", function (event) { $("[data-toggle='modal']").unbind('click.openmodal').on("click.openmodal", function (event) {
console.log('hi');
event.preventDefault(); event.preventDefault();
var modal=$(this); var modal=$(this);
var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num'); var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num');