mirror of https://github.com/CIRCL/AIL-framework
chg: [Domain + Date] get domain up range + get date days and months by daterange
parent
b6cd16e663
commit
1f97159413
|
@ -100,6 +100,58 @@ def sanathyse_port(port, domain, domain_type, strict=False, current_port=None):
|
|||
def is_domain_up(domain, domain_type):
|
||||
return r_serv_onion.hexists('{}_metadata:{}'.format(domain_type, domain), 'ports')
|
||||
|
||||
def get_domain_first_up(domain, domain_type, ports=None):
|
||||
'''
|
||||
Get all domain up (at least one time)
|
||||
|
||||
:param ports: list of ports, optional
|
||||
:type ports: list
|
||||
|
||||
:return: domain last up epoch
|
||||
:rtype: int
|
||||
'''
|
||||
if ports is None:
|
||||
ports = get_domain_all_ports(domain, domain_type)
|
||||
epoch_min = None
|
||||
for port in ports:
|
||||
res = r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, 0, withscores=True)[0]
|
||||
if not epoch_min:
|
||||
epoch_min = int(res[1])
|
||||
elif res[1] < epoch_min:
|
||||
epoch_min = int(res[1])
|
||||
return epoch_min
|
||||
|
||||
def get_last_domain_up_by_port(domain, domain_type, port):
|
||||
current_index = 0
|
||||
while True:
|
||||
res = r_serv_onion.zrevrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), current_index, current_index, withscores=True)
|
||||
# history found
|
||||
if res:
|
||||
item_core, epoch = res[0]
|
||||
epoch = int(epoch)
|
||||
if item_core == str(epoch):
|
||||
current_index +=1
|
||||
else:
|
||||
return epoch
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_domain_last_up(domain, domain_type, ports=None):
|
||||
if ports is None:
|
||||
ports = get_domain_all_ports(domain, domain_type)
|
||||
epoch_max = 0
|
||||
for port in ports:
|
||||
last_epoch_up = get_last_domain_up_by_port(domain, domain_type, port)
|
||||
if last_epoch_up > epoch_max:
|
||||
epoch_max = last_epoch_up
|
||||
return epoch_max
|
||||
|
||||
def get_domain_up_range(domain, domain_type):
|
||||
domain_metadata = {}
|
||||
domain_metadata['first_seen'] = get_domain_first_up(domain, domain_type)
|
||||
domain_metadata['last_seen'] = get_domain_last_up(domain, domain_type)
|
||||
return domain_metadata
|
||||
|
||||
def get_domain_all_ports(domain, domain_type):
|
||||
'''
|
||||
Return a list of all crawled ports
|
||||
|
@ -399,10 +451,22 @@ def verify_if_domain_exist(domain):
|
|||
|
||||
def api_verify_if_domain_exist(domain):
|
||||
if not verify_if_domain_exist(domain):
|
||||
return ({'status': 'error', 'reason': 'Domain not found'}, 404)
|
||||
return {'status': 'error', 'reason': 'Domain not found'}, 404
|
||||
else:
|
||||
return None
|
||||
|
||||
def api_get_domain_up_range(domain, domain_type=None):
|
||||
res = api_verify_if_domain_exist(domain)
|
||||
if res:
|
||||
return res
|
||||
if not domain_type:
|
||||
domain_type = get_domain_type(domain)
|
||||
res = get_domain_up_range(domain, domain_type)
|
||||
res['domain'] = domain
|
||||
return res, 200
|
||||
|
||||
|
||||
|
||||
## CLASS ##
|
||||
class Domain(object):
|
||||
"""docstring for Domain."""
|
||||
|
|
|
@ -2,6 +2,39 @@
|
|||
|
||||
import datetime
|
||||
|
||||
from dateutil.rrule import rrule, MONTHLY
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
def convert_date_str_to_datetime(date_str):
|
||||
res = datetime.date(int(date_str[0:4]), int(date_str[4:6]), int(date_str[6:8]))
|
||||
return res
|
||||
|
||||
def get_full_month_str(date_from, date_to):
|
||||
# add one day (if last day of the month)
|
||||
date_to = date_to + relativedelta(days=+1)
|
||||
full_month = [dt for dt in rrule(MONTHLY, bymonthday=1,dtstart=date_from, until=date_to)]
|
||||
# remove last_month (incomplete)
|
||||
if len(full_month):
|
||||
full_month = full_month[:-1]
|
||||
return full_month
|
||||
|
||||
def get_date_range_full_month_and_days(date_from, date_to):
|
||||
date_from = convert_date_str_to_datetime(date_from)
|
||||
date_to = convert_date_str_to_datetime(date_to)
|
||||
|
||||
full_month = get_full_month_str(date_from, date_to)
|
||||
|
||||
day_list = substract_date(date_from.strftime('%Y%m%d'), full_month[0].strftime('%Y%m%d'))
|
||||
# remove last day (day in full moth)
|
||||
if day_list:
|
||||
day_list = day_list[:-1]
|
||||
print(day_list)
|
||||
day_list.extend(substract_date( (full_month[-1] + relativedelta(months=+1) ).strftime('%Y%m%d'), date_to.strftime('%Y%m%d')))
|
||||
print(day_list)
|
||||
|
||||
full_month = [dt_month.strftime('%Y%m') for dt_month in full_month]
|
||||
return day_list, full_month
|
||||
|
||||
# # TODO: refractor me
|
||||
|
||||
class Date(object):
|
||||
|
@ -81,3 +114,23 @@ def substract_date(date_from, date_to):
|
|||
date = date_from + datetime.timedelta(i)
|
||||
l_date.append( date.strftime('%Y%m%d') )
|
||||
return l_date
|
||||
|
||||
def validate_str_date(str_date, separator=''):
|
||||
try:
|
||||
datetime.datetime.strptime(str_date, '%Y{}%m{}%d'.format(separator, separator))
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def sanitise_date_range(date_from, date_to, separator=''):
|
||||
'''
|
||||
Check/Return a correct date_form and date_to
|
||||
'''
|
||||
if not validate_str_date(date_from, separator=separator):
|
||||
date_from = datetime.date.today().strftime("%Y%m%d")
|
||||
if not validate_str_date(date_to, separator=separator):
|
||||
date_to = datetime.date.today().strftime("%Y%m%d")
|
||||
|
||||
if int(date_from) > int(date_to):
|
||||
date_from = date_to
|
||||
return {"date_from": date_from, "date_to": date_to}
|
||||
|
|
Loading…
Reference in New Issue