From 1f971594135f24e6e4da7307138fe793399cfc50 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 13 Dec 2019 16:57:24 +0100 Subject: [PATCH] chg: [Domain + Date] get domain up range + get date days and months by daterange --- bin/lib/Domain.py | 66 +++++++++++++++++++++++++++++++++++++++++++- bin/packages/Date.py | 53 +++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 2ae6e9e3..4ea872e0 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -100,6 +100,58 @@ def sanathyse_port(port, domain, domain_type, strict=False, current_port=None): def is_domain_up(domain, domain_type): return r_serv_onion.hexists('{}_metadata:{}'.format(domain_type, domain), 'ports') +def get_domain_first_up(domain, domain_type, ports=None): + ''' + Get all domain up (at least one time) + + :param ports: list of ports, optional + :type ports: list + + :return: domain last up epoch + :rtype: int + ''' + if ports is None: + ports = get_domain_all_ports(domain, domain_type) + epoch_min = None + for port in ports: + res = r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, 0, withscores=True)[0] + if not epoch_min: + epoch_min = int(res[1]) + elif res[1] < epoch_min: + epoch_min = int(res[1]) + return epoch_min + +def get_last_domain_up_by_port(domain, domain_type, port): + current_index = 0 + while True: + res = r_serv_onion.zrevrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), current_index, current_index, withscores=True) + # history found + if res: + item_core, epoch = res[0] + epoch = int(epoch) + if item_core == str(epoch): + current_index +=1 + else: + return epoch + else: + return None + +def get_domain_last_up(domain, domain_type, ports=None): + if ports is None: + ports = get_domain_all_ports(domain, domain_type) + epoch_max = 0 + for port in ports: + last_epoch_up = get_last_domain_up_by_port(domain, domain_type, port) + if last_epoch_up > epoch_max: + epoch_max = last_epoch_up + return epoch_max + +def get_domain_up_range(domain, domain_type): + domain_metadata = {} + domain_metadata['first_seen'] = get_domain_first_up(domain, domain_type) + domain_metadata['last_seen'] = get_domain_last_up(domain, domain_type) + return domain_metadata + def get_domain_all_ports(domain, domain_type): ''' Return a list of all crawled ports @@ -399,10 +451,22 @@ def verify_if_domain_exist(domain): def api_verify_if_domain_exist(domain): if not verify_if_domain_exist(domain): - return ({'status': 'error', 'reason': 'Domain not found'}, 404) + return {'status': 'error', 'reason': 'Domain not found'}, 404 else: return None +def api_get_domain_up_range(domain, domain_type=None): + res = api_verify_if_domain_exist(domain) + if res: + return res + if not domain_type: + domain_type = get_domain_type(domain) + res = get_domain_up_range(domain, domain_type) + res['domain'] = domain + return res, 200 + + + ## CLASS ## class Domain(object): """docstring for Domain.""" diff --git a/bin/packages/Date.py b/bin/packages/Date.py index bf05020e..e9526dce 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -2,6 +2,39 @@ import datetime +from dateutil.rrule import rrule, MONTHLY +from dateutil.relativedelta import relativedelta + +def convert_date_str_to_datetime(date_str): + res = datetime.date(int(date_str[0:4]), int(date_str[4:6]), int(date_str[6:8])) + return res + +def get_full_month_str(date_from, date_to): + # add one day (if last day of the month) + date_to = date_to + relativedelta(days=+1) + full_month = [dt for dt in rrule(MONTHLY, bymonthday=1,dtstart=date_from, until=date_to)] + # remove last_month (incomplete) + if len(full_month): + full_month = full_month[:-1] + return full_month + +def get_date_range_full_month_and_days(date_from, date_to): + date_from = convert_date_str_to_datetime(date_from) + date_to = convert_date_str_to_datetime(date_to) + + full_month = get_full_month_str(date_from, date_to) + + day_list = substract_date(date_from.strftime('%Y%m%d'), full_month[0].strftime('%Y%m%d')) + # remove last day (day in full moth) + if day_list: + day_list = day_list[:-1] + print(day_list) + day_list.extend(substract_date( (full_month[-1] + relativedelta(months=+1) ).strftime('%Y%m%d'), date_to.strftime('%Y%m%d'))) + print(day_list) + + full_month = [dt_month.strftime('%Y%m') for dt_month in full_month] + return day_list, full_month + # # TODO: refractor me class Date(object): @@ -81,3 +114,23 @@ def substract_date(date_from, date_to): date = date_from + datetime.timedelta(i) l_date.append( date.strftime('%Y%m%d') ) return l_date + +def validate_str_date(str_date, separator=''): + try: + datetime.datetime.strptime(str_date, '%Y{}%m{}%d'.format(separator, separator)) + return True + except ValueError: + return False + +def sanitise_date_range(date_from, date_to, separator=''): + ''' + Check/Return a correct date_form and date_to + ''' + if not validate_str_date(date_from, separator=separator): + date_from = datetime.date.today().strftime("%Y%m%d") + if not validate_str_date(date_to, separator=separator): + date_to = datetime.date.today().strftime("%Y%m%d") + + if int(date_from) > int(date_to): + date_from = date_to + return {"date_from": date_from, "date_to": date_to}