AIL-framework/bin/lib/Statistics.py

137 lines
5.4 KiB
Python
Raw Normal View History

2020-05-20 17:03:58 +02:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import datetime
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
2020-05-20 17:03:58 +02:00
config_loader = ConfigLoader.ConfigLoader()
r_statistics = config_loader.get_db_conn("Kvrocks_Stats")
2020-05-20 17:03:58 +02:00
config_loader = None
2022-09-08 10:31:57 +02:00
PIE_CHART_MAX_CARDINALITY = 8
2020-05-20 17:03:58 +02:00
def incr_module_timeout_statistic(module_name):
curr_date = datetime.date.today()
2022-09-08 10:31:57 +02:00
r_statistics.hincrby(curr_date.strftime("%Y%m%d"), 'paste_by_modules_timeout:{}'.format(module_name), 1)
def create_item_statistics(item_id, source, size):
pass
def get_item_sources():
return r_statistics.smembers('all_provider_set')
def get_nb_items_processed_by_day_and_source(date, source):
nb_items = r_statistics.hget(f'{source}_num', date)
if not nb_items:
nb_items = 0
return int(nb_items)
def get_items_total_size_by_day_and_source(date, source):
total_size = r_statistics.hget(f'{source}_size', date)
if not total_size:
total_size = 0
return float(total_size)
def get_items_av_size_by_day_and_source(date, source):
av_size = r_statistics.hget(f'{source}_avg', date)
if not av_size:
av_size = 0
return float(av_size)
def _create_item_stats_size_nb(date, source, num, size, avg):
r_statistics.hset(f'{source}_num', date, num)
r_statistics.hset(f'{source}_size', date, size)
r_statistics.hset(f'{source}_avg', date, avg)
def get_item_stats_size_avg_by_date():
return r_statistics.zrange(f'top_avg_size_set_{date}', 0, -1, withscores=True)
def get_item_stats_nb_by_date():
return r_statistics.zrange(f'providers_set_{date}', 0, -1, withscores=True)
def _set_item_stats_nb_by_date(date, source):
return r_statistics.zrange(f'providers_set_{date}', )
# # TODO: load ZSET IN CACHE => FAST UPDATE
def update_item_stats_size_nb(item_id, source, size, date):
# Add/Update in Redis
r_statistics.sadd('all_provider_set', source)
nb_items = int(r_statistics.hincrby(f'{source}_num', date, 1))
sum_size = float(r_statistics.hincrbyfloat(f'{source}_size', date, size))
new_avg = sum_size / nb_items
r_statistics.hset(f'{source}_avg', date, new_avg)
# TOP Items Size
if r_statistics.zcard(f'top_size_set_{date}') < PIE_CHART_MAX_CARDINALITY:
r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg})
2022-09-08 10:31:57 +02:00
else:
member_set = r_statistics.zrangebyscore(f'top_avg_size_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
# Member set is a list of (value, score) pairs
if float(member_set[0][1]) < new_avg:
# remove min from set and add the new one
r_statistics.zrem(f'top_avg_size_set_{date}', member_set[0][0])
r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg})
2022-09-08 10:31:57 +02:00
# TOP Nb Items
if r_statistics.zcard(f'providers_set_{date}') < PIE_CHART_MAX_CARDINALITY or r_statistics.zscore(f'providers_set_{date}', source) != None:
r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)})
2022-09-08 10:31:57 +02:00
else: # zset at full capacity
member_set = r_statistics.zrangebyscore(f'providers_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
# Member set is a list of (value, score) pairs
if int(member_set[0][1]) < nb_items:
# remove min from set and add the new one
r_statistics.zrem(member_set[0][0])
r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)})
2022-09-08 10:31:57 +02:00
# keyword num
def _add_module_stats(module_name, total_sum, keyword, date):
r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(total_sum)})
2022-09-08 10:31:57 +02:00
# # TODO: ONE HSET BY MODULE / CUSTOM STATS
def update_module_stats(module_name, num, keyword, date):
# Add/Update in Redis
r_statistics.hincrby(date, f'{module_name}-{keyword}', int(num)) # # TODO: RENAME ME !!!!!!!!!!!!!!!!!!!!!!!!!
# Compute Most Posted
# check if this keyword is eligible for progression
keyword_total_sum = 0
curr_value = r_statistics.hget(date, f'{module_name}-{keyword}')
2022-09-08 10:31:57 +02:00
keyword_total_sum += int(curr_value) if curr_value is not None else 0
if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY:
r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)})
else: # zset at full capacity
2022-09-08 10:31:57 +02:00
member_set = r_statistics.zrangebyscore(f'top_{module_name}_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
# Member set is a list of (value, score) pairs
if int(member_set[0][1]) < keyword_total_sum:
# remove min from set and add the new one
2022-09-08 10:31:57 +02:00
r_statistics.zrem(f'top_{module_name}_set_{date}', member_set[0][0])
r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)})
2022-09-08 10:31:57 +02:00
def get_module_tld_stats_by_tld_date(date, tld):
nb_tld = r_statistics.hget(f'credential_by_tld:{date}', tld)
if not nb_tld:
nb_tld = 0
return int(nb_tld)
def get_module_tld_stats_by_date(module, date):
return r_statistics.hgetall(f'{module}_by_tld:{date}')
def add_module_tld_stats_by_date(module, date, tld, nb):
r_statistics.hincrby(f'{module}_by_tld:{date}', tld, int(nb))
# r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
# r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d'))