mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			164 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			164 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
#!/usr/bin/env python3
 | 
						|
# -*-coding:UTF-8 -*
 | 
						|
"""
 | 
						|
    This module makes statistics for some modules and providers
 | 
						|
 | 
						|
"""
 | 
						|
 | 
						|
import time
 | 
						|
import datetime
 | 
						|
import redis
 | 
						|
import os
 | 
						|
from packages import lib_words
 | 
						|
from packages.Date import Date
 | 
						|
from pubsublogger import publisher
 | 
						|
from Helper import Process
 | 
						|
from packages import Paste
 | 
						|
 | 
						|
# Config Var
 | 
						|
max_set_cardinality = 8
 | 
						|
 | 
						|
def get_date_range(num_day):
 | 
						|
    curr_date = datetime.date.today()
 | 
						|
    date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
 | 
						|
    date_list = []
 | 
						|
 | 
						|
    for i in range(0, num_day+1):
 | 
						|
        date_list.append(date.substract_day(i))
 | 
						|
    return date_list
 | 
						|
 | 
						|
 | 
						|
def compute_most_posted(server, message):
 | 
						|
    module, num, keyword, paste_date = message.split(';')
 | 
						|
 | 
						|
    redis_progression_name_set = 'top_'+ module +'_set_' + paste_date
 | 
						|
 | 
						|
    # Add/Update in Redis
 | 
						|
    server.hincrby(paste_date, module+'-'+keyword, int(num))
 | 
						|
 | 
						|
    # Compute Most Posted
 | 
						|
    date = get_date_range(0)[0]
 | 
						|
    # check if this keyword is eligible for progression
 | 
						|
    keyword_total_sum = 0
 | 
						|
 | 
						|
    curr_value = server.hget(date, module+'-'+keyword)
 | 
						|
    keyword_total_sum += int(curr_value) if curr_value is not None else 0
 | 
						|
 | 
						|
    if server.zcard(redis_progression_name_set) < max_set_cardinality:
 | 
						|
        server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
 | 
						|
 | 
						|
    else: # not in set
 | 
						|
        member_set = server.zrangebyscore(redis_progression_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
 | 
						|
        # Member set is a list of (value, score) pairs
 | 
						|
        if int(member_set[0][1]) < keyword_total_sum:
 | 
						|
            #remove min from set and add the new one
 | 
						|
            print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
 | 
						|
            server.zrem(redis_progression_name_set, member_set[0][0])
 | 
						|
            server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
 | 
						|
            print(redis_progression_name_set)
 | 
						|
 | 
						|
 | 
						|
def compute_provider_info(server_trend, server_pasteName, path):
 | 
						|
    redis_all_provider = 'all_provider_set'
 | 
						|
 | 
						|
    paste = Paste.Paste(path)
 | 
						|
 | 
						|
    paste_baseName = paste.p_name.split('.')[0]
 | 
						|
    paste_size = paste._get_p_size()
 | 
						|
    paste_provider = paste.p_source
 | 
						|
    paste_date = str(paste._get_p_date())
 | 
						|
    redis_sum_size_set = 'top_size_set_' + paste_date
 | 
						|
    redis_avg_size_name_set = 'top_avg_size_set_' + paste_date
 | 
						|
    redis_providers_name_set = 'providers_set_' + paste_date
 | 
						|
 | 
						|
    # Add/Update in Redis
 | 
						|
    server_pasteName.sadd(paste_baseName, path)
 | 
						|
    server_trend.sadd(redis_all_provider, paste_provider)
 | 
						|
 | 
						|
    num_paste = int(server_trend.hincrby(paste_provider+'_num', paste_date, 1))
 | 
						|
    sum_size = float(server_trend.hincrbyfloat(paste_provider+'_size', paste_date, paste_size))
 | 
						|
    new_avg = float(sum_size) / float(num_paste)
 | 
						|
    server_trend.hset(paste_provider +'_avg', paste_date, new_avg)
 | 
						|
 | 
						|
 | 
						|
    #
 | 
						|
    # Compute Most Posted
 | 
						|
    #
 | 
						|
 | 
						|
    # Size
 | 
						|
    if server_trend.zcard(redis_sum_size_set) < max_set_cardinality or server_trend.zscore(redis_sum_size_set, paste_provider) != "nil":
 | 
						|
        server_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider)
 | 
						|
        server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
 | 
						|
    else: #set full capacity
 | 
						|
        member_set = server_trend.zrangebyscore(redis_sum_size_set, '-inf', '+inf', withscores=True, start=0, num=1)
 | 
						|
        # Member set is a list of (value, score) pairs
 | 
						|
        if float(member_set[0][1]) < new_avg:
 | 
						|
            #remove min from set and add the new one
 | 
						|
            print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
 | 
						|
            server_trend.zrem(redis_sum_size_set, member_set[0][0])
 | 
						|
            server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
 | 
						|
            server_trend.zrem(redis_avg_size_name_set, member_set[0][0])
 | 
						|
            server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
 | 
						|
 | 
						|
 | 
						|
    # Num
 | 
						|
    # if set not full or provider already present
 | 
						|
    if server_trend.zcard(redis_providers_name_set) < max_set_cardinality or server_trend.zscore(redis_providers_name_set, paste_provider) != "nil":
 | 
						|
        server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
 | 
						|
    else: #set at full capacity
 | 
						|
        member_set = server_trend.zrangebyscore(redis_providers_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
 | 
						|
        # Member set is a list of (value, score) pairs
 | 
						|
        if int(member_set[0][1]) < num_paste:
 | 
						|
            #remove min from set and add the new one
 | 
						|
            print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
 | 
						|
            server_trend.zrem(member_set[0][0])
 | 
						|
            server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
 | 
						|
    # Port of the redis instance used by pubsublogger
 | 
						|
    publisher.port = 6380
 | 
						|
    # Script is the default channel used for the modules.
 | 
						|
    publisher.channel = 'Script'
 | 
						|
 | 
						|
    # Section name in bin/packages/modules.cfg
 | 
						|
    config_section = 'ModuleStats'
 | 
						|
 | 
						|
    # Setup the I/O queues
 | 
						|
    p = Process(config_section)
 | 
						|
 | 
						|
    # Sent to the logging a description of the module
 | 
						|
    publisher.info("Makes statistics about valid URL")
 | 
						|
 | 
						|
    # REDIS #
 | 
						|
    r_serv_trend = redis.StrictRedis(
 | 
						|
        host=p.config.get("ARDB_Trending", "host"),
 | 
						|
        port=p.config.get("ARDB_Trending", "port"),
 | 
						|
        db=p.config.get("ARDB_Trending", "db"),
 | 
						|
        decode_responses=True)
 | 
						|
 | 
						|
    r_serv_pasteName = redis.StrictRedis(
 | 
						|
        host=p.config.get("Redis_Paste_Name", "host"),
 | 
						|
        port=p.config.get("Redis_Paste_Name", "port"),
 | 
						|
        db=p.config.get("Redis_Paste_Name", "db"),
 | 
						|
        decode_responses=True)
 | 
						|
 | 
						|
    # Endless loop getting messages from the input queue
 | 
						|
    while True:
 | 
						|
        # Get one message from the input queue
 | 
						|
        message = p.get_from_set()
 | 
						|
 | 
						|
        if message is None:
 | 
						|
            publisher.debug("{} queue is empty, waiting".format(config_section))
 | 
						|
            print('sleeping')
 | 
						|
            time.sleep(20)
 | 
						|
            continue
 | 
						|
 | 
						|
        else:
 | 
						|
            # Do something with the message from the queue
 | 
						|
            if len(message.split(';')) > 1:
 | 
						|
                compute_most_posted(r_serv_trend, message)
 | 
						|
            else:
 | 
						|
                compute_provider_info(r_serv_trend, r_serv_pasteName, message)
 |