mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			137 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			137 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| # -*-coding:UTF-8 -*
 | |
| 
 | |
| import datetime
 | |
| import os
 | |
| import sys
 | |
| 
 | |
| sys.path.append(os.environ['AIL_BIN'])
 | |
| ##################################
 | |
| # Import Project packages
 | |
| ##################################
 | |
| from lib import ConfigLoader
 | |
| 
 | |
| config_loader = ConfigLoader.ConfigLoader()
 | |
| r_statistics = config_loader.get_db_conn("Kvrocks_Stats")
 | |
| config_loader = None
 | |
| 
 | |
| PIE_CHART_MAX_CARDINALITY = 8
 | |
| 
 | |
| def incr_module_timeout_statistic(module_name):
 | |
|     curr_date = datetime.date.today()
 | |
|     r_statistics.hincrby(curr_date.strftime("%Y%m%d"), 'paste_by_modules_timeout:{}'.format(module_name), 1)
 | |
| 
 | |
| def create_item_statistics(item_id, source, size):
 | |
|     pass
 | |
| 
 | |
| def get_item_sources():
 | |
|     return r_statistics.smembers('all_provider_set')
 | |
| 
 | |
| def get_nb_items_processed_by_day_and_source(date, source):
 | |
|     nb_items = r_statistics.hget(f'{source}_num', date)
 | |
|     if not nb_items:
 | |
|         nb_items = 0
 | |
|     return int(nb_items)
 | |
| 
 | |
| def get_items_total_size_by_day_and_source(date, source):
 | |
|     total_size = r_statistics.hget(f'{source}_size', date)
 | |
|     if not total_size:
 | |
|         total_size = 0
 | |
|     return float(total_size)
 | |
| 
 | |
| def get_items_av_size_by_day_and_source(date, source):
 | |
|     av_size = r_statistics.hget(f'{source}_avg', date)
 | |
|     if not av_size:
 | |
|         av_size = 0
 | |
|     return float(av_size)
 | |
| 
 | |
| def _create_item_stats_size_nb(date, source, num, size, avg):
 | |
|     r_statistics.hset(f'{source}_num', date, num)
 | |
|     r_statistics.hset(f'{source}_size', date, size)
 | |
|     r_statistics.hset(f'{source}_avg', date, avg)
 | |
| 
 | |
| def get_item_stats_size_avg_by_date():
 | |
|     return r_statistics.zrange(f'top_avg_size_set_{date}', 0, -1, withscores=True)
 | |
| 
 | |
| def get_item_stats_nb_by_date():
 | |
|     return r_statistics.zrange(f'providers_set_{date}', 0, -1, withscores=True)
 | |
| 
 | |
| def _set_item_stats_nb_by_date(date, source):
 | |
|     return r_statistics.zrange(f'providers_set_{date}', )
 | |
| 
 | |
| # # TODO: load ZSET IN CACHE => FAST UPDATE
 | |
| def update_item_stats_size_nb(item_id, source, size, date):
 | |
|     # Add/Update in Redis
 | |
|     r_statistics.sadd('all_provider_set', source)
 | |
| 
 | |
|     nb_items = int(r_statistics.hincrby(f'{source}_num', date, 1))
 | |
|     sum_size = float(r_statistics.hincrbyfloat(f'{source}_size', date, size))
 | |
|     new_avg = sum_size / nb_items
 | |
|     r_statistics.hset(f'{source}_avg', date, new_avg)
 | |
| 
 | |
|     # TOP Items Size
 | |
|     if r_statistics.zcard(f'top_size_set_{date}') < PIE_CHART_MAX_CARDINALITY:
 | |
|         r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg})
 | |
| 
 | |
|     else:
 | |
|         member_set = r_statistics.zrangebyscore(f'top_avg_size_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
 | |
|         # Member set is a list of (value, score) pairs
 | |
|         if float(member_set[0][1]) < new_avg:
 | |
|             # remove min from set and add the new one
 | |
|             r_statistics.zrem(f'top_avg_size_set_{date}', member_set[0][0])
 | |
|             r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg})
 | |
| 
 | |
|     # TOP Nb Items
 | |
|     if r_statistics.zcard(f'providers_set_{date}') < PIE_CHART_MAX_CARDINALITY or r_statistics.zscore(f'providers_set_{date}', source) != None:
 | |
|         r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)})
 | |
|     else: # zset at full capacity
 | |
|         member_set = r_statistics.zrangebyscore(f'providers_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
 | |
|         # Member set is a list of (value, score) pairs
 | |
|         if int(member_set[0][1]) < nb_items:
 | |
|             # remove min from set and add the new one
 | |
|             r_statistics.zrem(member_set[0][0])
 | |
|             r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)})
 | |
| 
 | |
| # keyword  num
 | |
| 
 | |
| def _add_module_stats(module_name, total_sum, keyword, date):
 | |
|     r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(total_sum)})
 | |
| 
 | |
| # # TODO: ONE HSET BY MODULE / CUSTOM STATS
 | |
| def update_module_stats(module_name, num, keyword, date):
 | |
| 
 | |
|     # Add/Update in Redis
 | |
|     r_statistics.hincrby(date, f'{module_name}-{keyword}', int(num)) # # TODO: RENAME ME !!!!!!!!!!!!!!!!!!!!!!!!!
 | |
| 
 | |
|     # Compute Most Posted
 | |
|     # check if this keyword is eligible for progression
 | |
|     keyword_total_sum = 0
 | |
| 
 | |
|     curr_value = r_statistics.hget(date, f'{module_name}-{keyword}')
 | |
|     keyword_total_sum += int(curr_value) if curr_value is not None else 0
 | |
| 
 | |
|     if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY:
 | |
|         r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)})
 | |
|     else:  # zset at full capacity
 | |
|         member_set = r_statistics.zrangebyscore(f'top_{module_name}_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
 | |
|         # Member set is a list of (value, score) pairs
 | |
|         if int(member_set[0][1]) < keyword_total_sum:
 | |
|             # remove min from set and add the new one
 | |
|             r_statistics.zrem(f'top_{module_name}_set_{date}', member_set[0][0])
 | |
|             r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)})
 | |
| 
 | |
| def get_module_tld_stats_by_tld_date(date, tld):
 | |
|     nb_tld = r_statistics.hget(f'credential_by_tld:{date}', tld)
 | |
|     if not nb_tld:
 | |
|         nb_tld = 0
 | |
|     return int(nb_tld)
 | |
| 
 | |
| def get_module_tld_stats_by_date(module, date):
 | |
|     return r_statistics.hgetall(f'{module}_by_tld:{date}')
 | |
| 
 | |
| def add_module_tld_stats_by_date(module, date, tld, nb):
 | |
|     r_statistics.hincrby(f'{module}_by_tld:{date}', tld, int(nb))
 | |
| 
 | |
| # r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
 | |
| # r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
 |