chg: [Domain + Date] get domain up range + get date days and months by daterange

pull/453/head
Terrtia 2019-12-13 16:57:24 +01:00
parent b6cd16e663
commit 1f97159413
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
2 changed files with 118 additions and 1 deletions

View File

@ -100,6 +100,58 @@ def sanathyse_port(port, domain, domain_type, strict=False, current_port=None):
def is_domain_up(domain, domain_type):
return r_serv_onion.hexists('{}_metadata:{}'.format(domain_type, domain), 'ports')
def get_domain_first_up(domain, domain_type, ports=None):
'''
Get all domain up (at least one time)
:param ports: list of ports, optional
:type ports: list
:return: domain last up epoch
:rtype: int
'''
if ports is None:
ports = get_domain_all_ports(domain, domain_type)
epoch_min = None
for port in ports:
res = r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, 0, withscores=True)[0]
if not epoch_min:
epoch_min = int(res[1])
elif res[1] < epoch_min:
epoch_min = int(res[1])
return epoch_min
def get_last_domain_up_by_port(domain, domain_type, port):
current_index = 0
while True:
res = r_serv_onion.zrevrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), current_index, current_index, withscores=True)
# history found
if res:
item_core, epoch = res[0]
epoch = int(epoch)
if item_core == str(epoch):
current_index +=1
else:
return epoch
else:
return None
def get_domain_last_up(domain, domain_type, ports=None):
if ports is None:
ports = get_domain_all_ports(domain, domain_type)
epoch_max = 0
for port in ports:
last_epoch_up = get_last_domain_up_by_port(domain, domain_type, port)
if last_epoch_up > epoch_max:
epoch_max = last_epoch_up
return epoch_max
def get_domain_up_range(domain, domain_type):
domain_metadata = {}
domain_metadata['first_seen'] = get_domain_first_up(domain, domain_type)
domain_metadata['last_seen'] = get_domain_last_up(domain, domain_type)
return domain_metadata
def get_domain_all_ports(domain, domain_type):
'''
Return a list of all crawled ports
@ -399,10 +451,22 @@ def verify_if_domain_exist(domain):
def api_verify_if_domain_exist(domain):
if not verify_if_domain_exist(domain):
return ({'status': 'error', 'reason': 'Domain not found'}, 404)
return {'status': 'error', 'reason': 'Domain not found'}, 404
else:
return None
def api_get_domain_up_range(domain, domain_type=None):
res = api_verify_if_domain_exist(domain)
if res:
return res
if not domain_type:
domain_type = get_domain_type(domain)
res = get_domain_up_range(domain, domain_type)
res['domain'] = domain
return res, 200
## CLASS ##
class Domain(object):
"""docstring for Domain."""

View File

@ -2,6 +2,39 @@
import datetime
from dateutil.rrule import rrule, MONTHLY
from dateutil.relativedelta import relativedelta
def convert_date_str_to_datetime(date_str):
res = datetime.date(int(date_str[0:4]), int(date_str[4:6]), int(date_str[6:8]))
return res
def get_full_month_str(date_from, date_to):
# add one day (if last day of the month)
date_to = date_to + relativedelta(days=+1)
full_month = [dt for dt in rrule(MONTHLY, bymonthday=1,dtstart=date_from, until=date_to)]
# remove last_month (incomplete)
if len(full_month):
full_month = full_month[:-1]
return full_month
def get_date_range_full_month_and_days(date_from, date_to):
date_from = convert_date_str_to_datetime(date_from)
date_to = convert_date_str_to_datetime(date_to)
full_month = get_full_month_str(date_from, date_to)
day_list = substract_date(date_from.strftime('%Y%m%d'), full_month[0].strftime('%Y%m%d'))
# remove last day (day in full moth)
if day_list:
day_list = day_list[:-1]
print(day_list)
day_list.extend(substract_date( (full_month[-1] + relativedelta(months=+1) ).strftime('%Y%m%d'), date_to.strftime('%Y%m%d')))
print(day_list)
full_month = [dt_month.strftime('%Y%m') for dt_month in full_month]
return day_list, full_month
# # TODO: refractor me
class Date(object):
@ -81,3 +114,23 @@ def substract_date(date_from, date_to):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
def validate_str_date(str_date, separator=''):
try:
datetime.datetime.strptime(str_date, '%Y{}%m{}%d'.format(separator, separator))
return True
except ValueError:
return False
def sanitise_date_range(date_from, date_to, separator=''):
'''
Check/Return a correct date_form and date_to
'''
if not validate_str_date(date_from, separator=separator):
date_from = datetime.date.today().strftime("%Y%m%d")
if not validate_str_date(date_to, separator=separator):
date_to = datetime.date.today().strftime("%Y%m%d")
if int(date_from) > int(date_to):
date_from = date_to
return {"date_from": date_from, "date_to": date_to}