mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			162 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			162 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python2
 | |
| # -*-coding:UTF-8 -*
 | |
| """
 | |
|     This module makes statistics for some modules and providers
 | |
| 
 | |
| """
 | |
| 
 | |
| import time
 | |
| import datetime
 | |
| import redis
 | |
| import os
 | |
| from packages import lib_words
 | |
| from packages.Date import Date
 | |
| from pubsublogger import publisher
 | |
| from Helper import Process
 | |
| from packages import Paste
 | |
| 
 | |
| # Config Var
 | |
| max_set_cardinality = 8
 | |
| 
 | |
| def get_date_range(num_day):
 | |
|     curr_date = datetime.date.today()
 | |
|     date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
 | |
|     date_list = []
 | |
| 
 | |
|     for i in range(0, num_day+1):
 | |
|         date_list.append(date.substract_day(i))
 | |
|     return date_list
 | |
| 
 | |
| 
 | |
| def compute_most_posted(server, message):
 | |
|     module, num, keyword, paste_date = message.split(';')
 | |
| 
 | |
|     redis_progression_name_set = 'top_'+ module +'_set_' + paste_date
 | |
| 
 | |
|     # Add/Update in Redis
 | |
|     server.hincrby(paste_date, module+'-'+keyword, int(num))
 | |
| 
 | |
|     # Compute Most Posted
 | |
|     date = get_date_range(0)[0]
 | |
|     # check if this keyword is eligible for progression
 | |
|     keyword_total_sum = 0 
 | |
| 
 | |
|     curr_value = server.hget(date, module+'-'+keyword)
 | |
|     keyword_total_sum += int(curr_value) if curr_value is not None else 0
 | |
|         
 | |
|     if server.zcard(redis_progression_name_set) < max_set_cardinality:
 | |
|         server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
 | |
| 
 | |
|     else: # not in set
 | |
|         member_set = server.zrangebyscore(redis_progression_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
 | |
|         # Member set is a list of (value, score) pairs
 | |
|         if int(member_set[0][1]) < keyword_total_sum:
 | |
|             #remove min from set and add the new one
 | |
|             print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
 | |
|             server.zrem(redis_progression_name_set, member_set[0][0])
 | |
|             server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
 | |
|             print redis_progression_name_set
 | |
| 
 | |
| 
 | |
| def compute_provider_info(server_trend, server_pasteName, path):
 | |
|     redis_all_provider = 'all_provider_set'
 | |
|     
 | |
|     paste = Paste.Paste(path)
 | |
|     
 | |
|     paste_baseName = paste.p_name.split('.')[0]
 | |
|     paste_size = paste._get_p_size()
 | |
|     paste_provider = paste.p_source
 | |
|     paste_date = str(paste._get_p_date())
 | |
|     redis_sum_size_set = 'top_size_set_' + paste_date
 | |
|     redis_avg_size_name_set = 'top_avg_size_set_' + paste_date
 | |
|     redis_providers_name_set = 'providers_set_' + paste_date
 | |
| 
 | |
|     # Add/Update in Redis
 | |
|     server_pasteName.sadd(paste_baseName, path)
 | |
|     server_trend.sadd(redis_all_provider, paste_provider)
 | |
| 
 | |
|     num_paste = int(server_trend.hincrby(paste_provider+'_num', paste_date, 1))
 | |
|     sum_size = float(server_trend.hincrbyfloat(paste_provider+'_size', paste_date, paste_size))
 | |
|     new_avg = float(sum_size) / float(num_paste)
 | |
|     server_trend.hset(paste_provider +'_avg', paste_date, new_avg)
 | |
| 
 | |
| 
 | |
|     #
 | |
|     # Compute Most Posted
 | |
|     #
 | |
|         
 | |
|     # Size
 | |
|     if server_trend.zcard(redis_sum_size_set) < max_set_cardinality or server_trend.zscore(redis_sum_size_set, paste_provider) != "nil":
 | |
|         server_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider)
 | |
|         server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
 | |
|     else: #set full capacity
 | |
|         member_set = server_trend.zrangebyscore(redis_sum_size_set, '-inf', '+inf', withscores=True, start=0, num=1)
 | |
|         # Member set is a list of (value, score) pairs
 | |
|         if float(member_set[0][1]) < new_avg:
 | |
|             #remove min from set and add the new one
 | |
|             print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
 | |
|             server_trend.zrem(redis_sum_size_set, member_set[0][0])
 | |
|             server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
 | |
|             server_trend.zrem(redis_avg_size_name_set, member_set[0][0])
 | |
|             server_trend.zadd(redis_avg_size_name_set, float(new_avg), paste_provider)
 | |
| 
 | |
| 
 | |
|     # Num
 | |
|     # if set not full or provider already present
 | |
|     if server_trend.zcard(redis_providers_name_set) < max_set_cardinality or server_trend.zscore(redis_providers_name_set, paste_provider) != "nil":
 | |
|         server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
 | |
|     else: #set at full capacity
 | |
|         member_set = server_trend.zrangebyscore(redis_providers_name_set, '-inf', '+inf', withscores=True, start=0, num=1)
 | |
|         # Member set is a list of (value, score) pairs
 | |
|         if int(member_set[0][1]) < num_paste:
 | |
|             #remove min from set and add the new one
 | |
|             print 'Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
 | |
|             server_trend.zrem(member_set[0][0])
 | |
|             server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
 | |
|     # Port of the redis instance used by pubsublogger
 | |
|     publisher.port = 6380
 | |
|     # Script is the default channel used for the modules.
 | |
|     publisher.channel = 'Script'
 | |
| 
 | |
|     # Section name in bin/packages/modules.cfg
 | |
|     config_section = 'ModuleStats'
 | |
| 
 | |
|     # Setup the I/O queues
 | |
|     p = Process(config_section)
 | |
| 
 | |
|     # Sent to the logging a description of the module
 | |
|     publisher.info("Makes statistics about valid URL")
 | |
| 
 | |
|     # REDIS #
 | |
|     r_serv_trend = redis.StrictRedis(
 | |
|         host=p.config.get("Redis_Level_DB_Trending", "host"),
 | |
|         port=p.config.get("Redis_Level_DB_Trending", "port"),
 | |
|         db=p.config.get("Redis_Level_DB_Trending", "db"))
 | |
| 
 | |
|     r_serv_pasteName = redis.StrictRedis(
 | |
|         host=p.config.get("Redis_Paste_Name", "host"),
 | |
|         port=p.config.get("Redis_Paste_Name", "port"),
 | |
|         db=p.config.get("Redis_Paste_Name", "db"))
 | |
| 
 | |
|     # Endless loop getting messages from the input queue
 | |
|     while True:
 | |
|         # Get one message from the input queue
 | |
|         message = p.get_from_set()
 | |
| 
 | |
|         if message is None:
 | |
|             publisher.debug("{} queue is empty, waiting".format(config_section))
 | |
|             print 'sleeping'
 | |
|             time.sleep(20)
 | |
|             continue
 | |
| 
 | |
|         else:
 | |
|             # Do something with the message from the queue
 | |
|             if len(message.split(';')) > 1:
 | |
|                 compute_most_posted(r_serv_trend, message)
 | |
|             else:
 | |
|                 compute_provider_info(r_serv_trend, r_serv_pasteName, message)
 |