2018-05-04 13:53:29 +02:00
|
|
|
#!/usr/bin/env python3
|
2016-07-25 16:38:57 +02:00
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
2016-08-22 20:59:56 +02:00
|
|
|
This module makes statistics for some modules and providers
|
|
|
|
|
2016-07-25 16:38:57 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
import time
|
|
|
|
import datetime
|
|
|
|
import redis
|
|
|
|
import os
|
|
|
|
from packages import lib_words
|
|
|
|
from packages.Date import Date
|
|
|
|
from pubsublogger import publisher
|
|
|
|
from Helper import Process
|
|
|
|
from packages import Paste
|
|
|
|
|
|
|
|
# Config Var
|
2016-08-11 17:57:23 +02:00
|
|
|
max_set_cardinality = 8
|
2016-07-25 16:38:57 +02:00
|
|
|
|
|
|
|
def get_date_range(num_day):
|
|
|
|
curr_date = datetime.date.today()
|
|
|
|
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
|
|
|
|
date_list = []
|
|
|
|
|
|
|
|
for i in range(0, num_day+1):
|
|
|
|
date_list.append(date.substract_day(i))
|
|
|
|
return date_list
|
|
|
|
|
|
|
|
|
2016-07-29 16:52:50 +02:00
|
|
|
def compute_most_posted(server, message):
|
2016-07-25 16:38:57 +02:00
|
|
|
module, num, keyword, paste_date = message.split(';')
|
|
|
|
|
2016-08-18 15:34:19 +02:00
|
|
|
redis_progression_name_set = 'top_'+ module +'_set_' + paste_date
|
2016-08-08 11:37:18 +02:00
|
|
|
|
2016-07-25 16:38:57 +02:00
|
|
|
# Add/Update in Redis
|
2016-08-18 15:34:19 +02:00
|
|
|
server.hincrby(paste_date, module+'-'+keyword, int(num))
|
2016-07-25 16:38:57 +02:00
|
|
|
|
|
|
|
# Compute Most Posted
|
2016-08-08 11:37:18 +02:00
|
|
|
date = get_date_range(0)[0]
|
2016-07-25 16:38:57 +02:00
|
|
|
# check if this keyword is eligible for progression
|
2018-04-16 14:50:04 +02:00
|
|
|
keyword_total_sum = 0
|
2016-07-29 16:52:50 +02:00
|
|
|
|
|
|
|
curr_value = server.hget(date, module+'-'+keyword)
|
|
|
|
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
2018-04-16 14:50:04 +02:00
|
|
|
|
2016-08-18 15:34:19 +02:00
|
|
|
if server.zcard(redis_progression_name_set) < max_set_cardinality:
|
|
|
|
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
|
|
|
|
|
|
|
else: # not in set
|
|
|
|
member_set = server.zrangebyscore(redis_progression_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
|
|
|
# Member set is a list of (value, score) pairs
|
|
|
|
if int(member_set[0][1]) < keyword_total_sum:
|
|
|
|
#remove min from set and add the new one
|
2018-05-04 13:53:29 +02:00
|
|
|
print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
2016-08-18 15:34:19 +02:00
|
|
|
server.zrem(redis_progression_name_set, member_set[0][0])
|
|
|
|
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
|
2018-04-16 14:50:04 +02:00
|
|
|
print(redis_progression_name_set)
|
2016-08-18 15:34:19 +02:00
|
|
|
|
2016-07-25 16:38:57 +02:00
|
|
|
|
2016-08-23 08:52:54 +02:00
|
|
|
def compute_provider_info(server_trend, server_pasteName, path):
|
2016-08-16 11:07:36 +02:00
|
|
|
redis_all_provider = 'all_provider_set'
|
2018-04-16 14:50:04 +02:00
|
|
|
|
2016-07-25 16:38:57 +02:00
|
|
|
paste = Paste.Paste(path)
|
2018-04-16 14:50:04 +02:00
|
|
|
|
2016-08-23 08:52:54 +02:00
|
|
|
paste_baseName = paste.p_name.split('.')[0]
|
2016-07-25 16:38:57 +02:00
|
|
|
paste_size = paste._get_p_size()
|
|
|
|
paste_provider = paste.p_source
|
2016-08-18 15:34:19 +02:00
|
|
|
paste_date = str(paste._get_p_date())
|
|
|
|
redis_sum_size_set = 'top_size_set_' + paste_date
|
|
|
|
redis_avg_size_name_set = 'top_avg_size_set_' + paste_date
|
|
|
|
redis_providers_name_set = 'providers_set_' + paste_date
|
2016-07-25 16:38:57 +02:00
|
|
|
|
|
|
|
# Add/Update in Redis
|
2016-08-23 08:52:54 +02:00
|
|
|
server_pasteName.sadd(paste_baseName, path)
|
|
|
|
server_trend.sadd(redis_all_provider, paste_provider)
|
2016-08-18 15:34:19 +02:00
|
|
|
|
2016-08-23 08:52:54 +02:00
|
|
|
num_paste = int(server_trend.hincrby(paste_provider+'_num', paste_date, 1))
|
|
|
|
sum_size = float(server_trend.hincrbyfloat(paste_provider+'_size', paste_date, paste_size))
|
2016-08-18 15:34:19 +02:00
|
|
|
new_avg = float(sum_size) / float(num_paste)
|
2016-08-23 08:52:54 +02:00
|
|
|
server_trend.hset(paste_provider +'_avg', paste_date, new_avg)
|
2016-08-18 15:34:19 +02:00
|
|
|
|
2016-07-25 16:38:57 +02:00
|
|
|
|
2016-07-29 16:52:50 +02:00
|
|
|
#
|
2016-07-25 16:38:57 +02:00
|
|
|
# Compute Most Posted
|
2016-07-29 16:52:50 +02:00
|
|
|
#
|
2018-04-16 14:50:04 +02:00
|
|
|
|
2016-07-29 16:52:50 +02:00
|
|
|
# Size
|
2016-08-23 08:52:54 +02:00
|
|
|
if server_trend.zcard(redis_sum_size_set) < max_set_cardinality or server_trend.zscore(redis_sum_size_set, paste_provider) != "nil":
|
|
|
|
server_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider)
|
|
|
|
server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
2016-08-18 15:34:19 +02:00
|
|
|
else: #set full capacity
|
2016-08-23 08:52:54 +02:00
|
|
|
member_set = server_trend.zrangebyscore(redis_sum_size_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
2016-08-18 15:34:19 +02:00
|
|
|
# Member set is a list of (value, score) pairs
|
|
|
|
if float(member_set[0][1]) < new_avg:
|
|
|
|
#remove min from set and add the new one
|
2018-04-16 14:50:04 +02:00
|
|
|
print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
2016-08-23 08:52:54 +02:00
|
|
|
server_trend.zrem(redis_sum_size_set, member_set[0][0])
|
|
|
|
server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
|
|
|
|
server_trend.zrem(redis_avg_size_name_set, member_set[0][0])
|
|
|
|
server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
|
2016-08-18 15:34:19 +02:00
|
|
|
|
2016-07-29 16:52:50 +02:00
|
|
|
|
|
|
|
# Num
|
2016-08-18 15:34:19 +02:00
|
|
|
# if set not full or provider already present
|
2016-08-23 08:52:54 +02:00
|
|
|
if server_trend.zcard(redis_providers_name_set) < max_set_cardinality or server_trend.zscore(redis_providers_name_set, paste_provider) != "nil":
|
|
|
|
server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
2016-08-18 15:34:19 +02:00
|
|
|
else: #set at full capacity
|
2016-08-23 08:52:54 +02:00
|
|
|
member_set = server_trend.zrangebyscore(redis_providers_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
|
2016-08-18 15:34:19 +02:00
|
|
|
# Member set is a list of (value, score) pairs
|
|
|
|
if int(member_set[0][1]) < num_paste:
|
|
|
|
#remove min from set and add the new one
|
2018-04-16 14:50:04 +02:00
|
|
|
print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
|
2016-08-23 08:52:54 +02:00
|
|
|
server_trend.zrem(member_set[0][0])
|
|
|
|
server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
|
2016-08-18 15:34:19 +02:00
|
|
|
|
2016-07-25 16:38:57 +02:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
|
|
# Port of the redis instance used by pubsublogger
|
|
|
|
publisher.port = 6380
|
|
|
|
# Script is the default channel used for the modules.
|
|
|
|
publisher.channel = 'Script'
|
|
|
|
|
|
|
|
# Section name in bin/packages/modules.cfg
|
|
|
|
config_section = 'ModuleStats'
|
|
|
|
|
|
|
|
# Setup the I/O queues
|
|
|
|
p = Process(config_section)
|
|
|
|
|
|
|
|
# Sent to the logging a description of the module
|
|
|
|
publisher.info("Makes statistics about valid URL")
|
|
|
|
|
|
|
|
# REDIS #
|
|
|
|
r_serv_trend = redis.StrictRedis(
|
2018-05-07 14:50:40 +02:00
|
|
|
host=p.config.get("ARDB_Trending", "host"),
|
|
|
|
port=p.config.get("ARDB_Trending", "port"),
|
|
|
|
db=p.config.get("ARDB_Trending", "db"),
|
2018-05-04 13:53:29 +02:00
|
|
|
decode_responses=True)
|
2016-07-25 16:38:57 +02:00
|
|
|
|
2016-08-23 08:52:54 +02:00
|
|
|
r_serv_pasteName = redis.StrictRedis(
|
|
|
|
host=p.config.get("Redis_Paste_Name", "host"),
|
|
|
|
port=p.config.get("Redis_Paste_Name", "port"),
|
2018-05-04 13:53:29 +02:00
|
|
|
db=p.config.get("Redis_Paste_Name", "db"),
|
|
|
|
decode_responses=True)
|
2016-08-23 08:52:54 +02:00
|
|
|
|
2016-07-25 16:38:57 +02:00
|
|
|
# Endless loop getting messages from the input queue
|
|
|
|
while True:
|
|
|
|
# Get one message from the input queue
|
|
|
|
message = p.get_from_set()
|
|
|
|
|
|
|
|
if message is None:
|
|
|
|
publisher.debug("{} queue is empty, waiting".format(config_section))
|
2018-04-16 14:50:04 +02:00
|
|
|
print('sleeping')
|
2016-07-26 10:45:02 +02:00
|
|
|
time.sleep(20)
|
2016-07-25 16:38:57 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
else:
|
|
|
|
# Do something with the message from the queue
|
|
|
|
if len(message.split(';')) > 1:
|
2016-07-29 16:52:50 +02:00
|
|
|
compute_most_posted(r_serv_trend, message)
|
2016-07-25 16:38:57 +02:00
|
|
|
else:
|
2016-08-23 08:52:54 +02:00
|
|
|
compute_provider_info(r_serv_trend, r_serv_pasteName, message)
|