2020-05-20 17:03:58 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
|
|
|
|
import datetime
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
|
2022-11-28 15:01:40 +01:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
|
|
|
from lib import ConfigLoader
|
2020-05-20 17:03:58 +02:00
|
|
|
|
|
|
|
config_loader = ConfigLoader.ConfigLoader()
|
2023-01-18 16:28:08 +01:00
|
|
|
r_statistics = config_loader.get_db_conn("Kvrocks_Stats")
|
2020-05-20 17:03:58 +02:00
|
|
|
config_loader = None
|
|
|
|
|
2022-09-08 10:31:57 +02:00
|
|
|
PIE_CHART_MAX_CARDINALITY = 8
|
|
|
|
|
2020-05-20 17:03:58 +02:00
|
|
|
def incr_module_timeout_statistic(module_name):
|
|
|
|
curr_date = datetime.date.today()
|
2022-09-08 10:31:57 +02:00
|
|
|
r_statistics.hincrby(curr_date.strftime("%Y%m%d"), 'paste_by_modules_timeout:{}'.format(module_name), 1)
|
|
|
|
|
|
|
|
def create_item_statistics(item_id, source, size):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def get_item_sources():
|
|
|
|
return r_statistics.smembers('all_provider_set')
|
|
|
|
|
|
|
|
def get_nb_items_processed_by_day_and_source(date, source):
|
|
|
|
nb_items = r_statistics.hget(f'{source}_num', date)
|
|
|
|
if not nb_items:
|
|
|
|
nb_items = 0
|
|
|
|
return int(nb_items)
|
|
|
|
|
|
|
|
def get_items_total_size_by_day_and_source(date, source):
|
|
|
|
total_size = r_statistics.hget(f'{source}_size', date)
|
|
|
|
if not total_size:
|
|
|
|
total_size = 0
|
|
|
|
return float(total_size)
|
|
|
|
|
|
|
|
def get_items_av_size_by_day_and_source(date, source):
|
|
|
|
av_size = r_statistics.hget(f'{source}_avg', date)
|
|
|
|
if not av_size:
|
|
|
|
av_size = 0
|
|
|
|
return float(av_size)
|
|
|
|
|
|
|
|
def _create_item_stats_size_nb(date, source, num, size, avg):
|
|
|
|
r_statistics.hset(f'{source}_num', date, num)
|
|
|
|
r_statistics.hset(f'{source}_size', date, size)
|
|
|
|
r_statistics.hset(f'{source}_avg', date, avg)
|
|
|
|
|
|
|
|
def get_item_stats_size_avg_by_date():
|
|
|
|
return r_statistics.zrange(f'top_avg_size_set_{date}', 0, -1, withscores=True)
|
|
|
|
|
|
|
|
def get_item_stats_nb_by_date():
|
|
|
|
return r_statistics.zrange(f'providers_set_{date}', 0, -1, withscores=True)
|
|
|
|
|
|
|
|
def _set_item_stats_nb_by_date(date, source):
|
|
|
|
return r_statistics.zrange(f'providers_set_{date}', )
|
|
|
|
|
|
|
|
# # TODO: load ZSET IN CACHE => FAST UPDATE
|
|
|
|
def update_item_stats_size_nb(item_id, source, size, date):
|
|
|
|
# Add/Update in Redis
|
|
|
|
r_statistics.sadd('all_provider_set', source)
|
|
|
|
|
|
|
|
nb_items = int(r_statistics.hincrby(f'{source}_num', date, 1))
|
|
|
|
sum_size = float(r_statistics.hincrbyfloat(f'{source}_size', date, size))
|
|
|
|
new_avg = sum_size / nb_items
|
|
|
|
r_statistics.hset(f'{source}_avg', date, new_avg)
|
|
|
|
|
|
|
|
# TOP Items Size
|
|
|
|
if r_statistics.zcard(f'top_size_set_{date}') < PIE_CHART_MAX_CARDINALITY:
|
2022-11-29 16:01:01 +01:00
|
|
|
r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg})
|
|
|
|
|
2022-09-08 10:31:57 +02:00
|
|
|
else:
|
|
|
|
member_set = r_statistics.zrangebyscore(f'top_avg_size_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
|
|
|
|
# Member set is a list of (value, score) pairs
|
|
|
|
if float(member_set[0][1]) < new_avg:
|
|
|
|
# remove min from set and add the new one
|
|
|
|
r_statistics.zrem(f'top_avg_size_set_{date}', member_set[0][0])
|
2022-11-29 16:01:01 +01:00
|
|
|
r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg})
|
2022-09-08 10:31:57 +02:00
|
|
|
|
|
|
|
# TOP Nb Items
|
|
|
|
if r_statistics.zcard(f'providers_set_{date}') < PIE_CHART_MAX_CARDINALITY or r_statistics.zscore(f'providers_set_{date}', source) != None:
|
2022-11-29 16:01:01 +01:00
|
|
|
r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)})
|
2022-09-08 10:31:57 +02:00
|
|
|
else: # zset at full capacity
|
|
|
|
member_set = r_statistics.zrangebyscore(f'providers_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
|
|
|
|
# Member set is a list of (value, score) pairs
|
|
|
|
if int(member_set[0][1]) < nb_items:
|
|
|
|
# remove min from set and add the new one
|
|
|
|
r_statistics.zrem(member_set[0][0])
|
2022-11-29 16:01:01 +01:00
|
|
|
r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)})
|
2022-09-08 10:31:57 +02:00
|
|
|
|
|
|
|
# keyword num
|
|
|
|
|
|
|
|
def _add_module_stats(module_name, total_sum, keyword, date):
|
2022-11-29 16:01:01 +01:00
|
|
|
r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(total_sum)})
|
2022-09-08 10:31:57 +02:00
|
|
|
|
|
|
|
# # TODO: ONE HSET BY MODULE / CUSTOM STATS
|
|
|
|
def update_module_stats(module_name, num, keyword, date):
|
|
|
|
|
|
|
|
# Add/Update in Redis
|
|
|
|
r_statistics.hincrby(date, f'{module_name}-{keyword}', int(num)) # # TODO: RENAME ME !!!!!!!!!!!!!!!!!!!!!!!!!
|
|
|
|
|
|
|
|
# Compute Most Posted
|
|
|
|
# check if this keyword is eligible for progression
|
|
|
|
keyword_total_sum = 0
|
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
curr_value = r_statistics.hget(date, f'{module_name}-{keyword}')
|
2022-09-08 10:31:57 +02:00
|
|
|
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
|
|
|
|
|
|
|
if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY:
|
2022-11-29 16:01:01 +01:00
|
|
|
r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)})
|
2023-01-18 16:28:08 +01:00
|
|
|
else: # zset at full capacity
|
2022-09-08 10:31:57 +02:00
|
|
|
member_set = r_statistics.zrangebyscore(f'top_{module_name}_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
|
|
|
|
# Member set is a list of (value, score) pairs
|
|
|
|
if int(member_set[0][1]) < keyword_total_sum:
|
2023-01-18 16:28:08 +01:00
|
|
|
# remove min from set and add the new one
|
2022-09-08 10:31:57 +02:00
|
|
|
r_statistics.zrem(f'top_{module_name}_set_{date}', member_set[0][0])
|
2022-11-29 16:01:01 +01:00
|
|
|
r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)})
|
2022-09-08 10:31:57 +02:00
|
|
|
|
|
|
|
def get_module_tld_stats_by_tld_date(date, tld):
|
|
|
|
nb_tld = r_statistics.hget(f'credential_by_tld:{date}', tld)
|
|
|
|
if not nb_tld:
|
|
|
|
nb_tld = 0
|
|
|
|
return int(nb_tld)
|
|
|
|
|
|
|
|
def get_module_tld_stats_by_date(module, date):
|
|
|
|
return r_statistics.hgetall(f'{module}_by_tld:{date}')
|
|
|
|
|
|
|
|
def add_module_tld_stats_by_date(module, date, tld, nb):
|
|
|
|
r_statistics.hincrby(f'{module}_by_tld:{date}', tld, int(nb))
|
|
|
|
|
2022-11-29 16:01:01 +01:00
|
|
|
# r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
|
|
|
|
# r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
|