#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ This module makes statistics for some modules and providers """ import time import datetime import redis import os from packages.Date import Date from pubsublogger import publisher from Helper import Process from packages import Paste # Config Var max_set_cardinality = 8 def get_date_range(num_day): curr_date = datetime.date.today() date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) date_list = [] for i in range(0, num_day+1): date_list.append(date.substract_day(i)) return date_list def compute_most_posted(server, message): module, num, keyword, paste_date = message.split(';') redis_progression_name_set = 'top_'+ module +'_set_' + paste_date # Add/Update in Redis server.hincrby(paste_date, module+'-'+keyword, int(num)) # Compute Most Posted date = get_date_range(0)[0] # check if this keyword is eligible for progression keyword_total_sum = 0 curr_value = server.hget(date, module+'-'+keyword) keyword_total_sum += int(curr_value) if curr_value is not None else 0 if server.zcard(redis_progression_name_set) < max_set_cardinality: server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) else: # not in set member_set = server.zrangebyscore(redis_progression_name_set, '-inf', '+inf', withscores=True, start=0, num=1) # Member set is a list of (value, score) pairs if int(member_set[0][1]) < keyword_total_sum: #remove min from set and add the new one print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server.zrem(redis_progression_name_set, member_set[0][0]) server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) print(redis_progression_name_set) def compute_provider_info(server_trend, server_pasteName, path): redis_all_provider = 'all_provider_set' paste = Paste.Paste(path) paste_baseName = paste.p_name.split('.')[0] paste_size = paste._get_p_size() paste_provider = paste.p_source paste_date = str(paste._get_p_date()) redis_sum_size_set = 'top_size_set_' + paste_date redis_avg_size_name_set = 'top_avg_size_set_' + paste_date redis_providers_name_set = 'providers_set_' + paste_date # Add/Update in Redis server_pasteName.sadd(paste_baseName, path) server_trend.sadd(redis_all_provider, paste_provider) num_paste = int(server_trend.hincrby(paste_provider+'_num', paste_date, 1)) sum_size = float(server_trend.hincrbyfloat(paste_provider+'_size', paste_date, paste_size)) new_avg = float(sum_size) / float(num_paste) server_trend.hset(paste_provider +'_avg', paste_date, new_avg) # # Compute Most Posted # # Size if server_trend.zcard(redis_sum_size_set) < max_set_cardinality or server_trend.zscore(redis_sum_size_set, paste_provider) != "nil": server_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider) server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider) else: #set full capacity member_set = server_trend.zrangebyscore(redis_sum_size_set, '-inf', '+inf', withscores=True, start=0, num=1) # Member set is a list of (value, score) pairs if float(member_set[0][1]) < new_avg: #remove min from set and add the new one print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server_trend.zrem(redis_sum_size_set, member_set[0][0]) server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider) server_trend.zrem(redis_avg_size_name_set, member_set[0][0]) server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider) # Num # if set not full or provider already present if server_trend.zcard(redis_providers_name_set) < max_set_cardinality or server_trend.zscore(redis_providers_name_set, paste_provider) != "nil": server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider) else: #set at full capacity member_set = server_trend.zrangebyscore(redis_providers_name_set, '-inf', '+inf', withscores=True, start=0, num=1) # Member set is a list of (value, score) pairs if int(member_set[0][1]) < num_paste: #remove min from set and add the new one print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server_trend.zrem(member_set[0][0]) server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider) if __name__ == '__main__': # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # Port of the redis instance used by pubsublogger publisher.port = 6380 # Script is the default channel used for the modules. publisher.channel = 'Script' # Section name in bin/packages/modules.cfg config_section = 'ModuleStats' # Setup the I/O queues p = Process(config_section) # Sent to the logging a description of the module publisher.info("Makes statistics about valid URL") # REDIS # r_serv_trend = redis.StrictRedis( host=p.config.get("ARDB_Trending", "host"), port=p.config.get("ARDB_Trending", "port"), db=p.config.get("ARDB_Trending", "db"), decode_responses=True) r_serv_pasteName = redis.StrictRedis( host=p.config.get("Redis_Paste_Name", "host"), port=p.config.get("Redis_Paste_Name", "port"), db=p.config.get("Redis_Paste_Name", "db"), decode_responses=True) # Endless loop getting messages from the input queue while True: # Get one message from the input queue message = p.get_from_set() if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) print('sleeping') time.sleep(20) continue else: # Do something with the message from the queue if len(message.split(';')) > 1: compute_most_posted(r_serv_trend, message) else: compute_provider_info(r_serv_trend, r_serv_pasteName, message)