mirror of https://github.com/CIRCL/AIL-framework
chg: [Domain + Date] get domain up range + get date days and months by daterange
parent
b6cd16e663
commit
1f97159413
|
@ -100,6 +100,58 @@ def sanathyse_port(port, domain, domain_type, strict=False, current_port=None):
|
||||||
def is_domain_up(domain, domain_type):
|
def is_domain_up(domain, domain_type):
|
||||||
return r_serv_onion.hexists('{}_metadata:{}'.format(domain_type, domain), 'ports')
|
return r_serv_onion.hexists('{}_metadata:{}'.format(domain_type, domain), 'ports')
|
||||||
|
|
||||||
|
def get_domain_first_up(domain, domain_type, ports=None):
|
||||||
|
'''
|
||||||
|
Get all domain up (at least one time)
|
||||||
|
|
||||||
|
:param ports: list of ports, optional
|
||||||
|
:type ports: list
|
||||||
|
|
||||||
|
:return: domain last up epoch
|
||||||
|
:rtype: int
|
||||||
|
'''
|
||||||
|
if ports is None:
|
||||||
|
ports = get_domain_all_ports(domain, domain_type)
|
||||||
|
epoch_min = None
|
||||||
|
for port in ports:
|
||||||
|
res = r_serv_onion.zrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), 0, 0, withscores=True)[0]
|
||||||
|
if not epoch_min:
|
||||||
|
epoch_min = int(res[1])
|
||||||
|
elif res[1] < epoch_min:
|
||||||
|
epoch_min = int(res[1])
|
||||||
|
return epoch_min
|
||||||
|
|
||||||
|
def get_last_domain_up_by_port(domain, domain_type, port):
|
||||||
|
current_index = 0
|
||||||
|
while True:
|
||||||
|
res = r_serv_onion.zrevrange('crawler_history_{}:{}:{}'.format(domain_type, domain, port), current_index, current_index, withscores=True)
|
||||||
|
# history found
|
||||||
|
if res:
|
||||||
|
item_core, epoch = res[0]
|
||||||
|
epoch = int(epoch)
|
||||||
|
if item_core == str(epoch):
|
||||||
|
current_index +=1
|
||||||
|
else:
|
||||||
|
return epoch
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_domain_last_up(domain, domain_type, ports=None):
|
||||||
|
if ports is None:
|
||||||
|
ports = get_domain_all_ports(domain, domain_type)
|
||||||
|
epoch_max = 0
|
||||||
|
for port in ports:
|
||||||
|
last_epoch_up = get_last_domain_up_by_port(domain, domain_type, port)
|
||||||
|
if last_epoch_up > epoch_max:
|
||||||
|
epoch_max = last_epoch_up
|
||||||
|
return epoch_max
|
||||||
|
|
||||||
|
def get_domain_up_range(domain, domain_type):
|
||||||
|
domain_metadata = {}
|
||||||
|
domain_metadata['first_seen'] = get_domain_first_up(domain, domain_type)
|
||||||
|
domain_metadata['last_seen'] = get_domain_last_up(domain, domain_type)
|
||||||
|
return domain_metadata
|
||||||
|
|
||||||
def get_domain_all_ports(domain, domain_type):
|
def get_domain_all_ports(domain, domain_type):
|
||||||
'''
|
'''
|
||||||
Return a list of all crawled ports
|
Return a list of all crawled ports
|
||||||
|
@ -399,10 +451,22 @@ def verify_if_domain_exist(domain):
|
||||||
|
|
||||||
def api_verify_if_domain_exist(domain):
|
def api_verify_if_domain_exist(domain):
|
||||||
if not verify_if_domain_exist(domain):
|
if not verify_if_domain_exist(domain):
|
||||||
return ({'status': 'error', 'reason': 'Domain not found'}, 404)
|
return {'status': 'error', 'reason': 'Domain not found'}, 404
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def api_get_domain_up_range(domain, domain_type=None):
|
||||||
|
res = api_verify_if_domain_exist(domain)
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
if not domain_type:
|
||||||
|
domain_type = get_domain_type(domain)
|
||||||
|
res = get_domain_up_range(domain, domain_type)
|
||||||
|
res['domain'] = domain
|
||||||
|
return res, 200
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## CLASS ##
|
## CLASS ##
|
||||||
class Domain(object):
|
class Domain(object):
|
||||||
"""docstring for Domain."""
|
"""docstring for Domain."""
|
||||||
|
|
|
@ -2,6 +2,39 @@
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
from dateutil.rrule import rrule, MONTHLY
|
||||||
|
from dateutil.relativedelta import relativedelta
|
||||||
|
|
||||||
|
def convert_date_str_to_datetime(date_str):
|
||||||
|
res = datetime.date(int(date_str[0:4]), int(date_str[4:6]), int(date_str[6:8]))
|
||||||
|
return res
|
||||||
|
|
||||||
|
def get_full_month_str(date_from, date_to):
|
||||||
|
# add one day (if last day of the month)
|
||||||
|
date_to = date_to + relativedelta(days=+1)
|
||||||
|
full_month = [dt for dt in rrule(MONTHLY, bymonthday=1,dtstart=date_from, until=date_to)]
|
||||||
|
# remove last_month (incomplete)
|
||||||
|
if len(full_month):
|
||||||
|
full_month = full_month[:-1]
|
||||||
|
return full_month
|
||||||
|
|
||||||
|
def get_date_range_full_month_and_days(date_from, date_to):
|
||||||
|
date_from = convert_date_str_to_datetime(date_from)
|
||||||
|
date_to = convert_date_str_to_datetime(date_to)
|
||||||
|
|
||||||
|
full_month = get_full_month_str(date_from, date_to)
|
||||||
|
|
||||||
|
day_list = substract_date(date_from.strftime('%Y%m%d'), full_month[0].strftime('%Y%m%d'))
|
||||||
|
# remove last day (day in full moth)
|
||||||
|
if day_list:
|
||||||
|
day_list = day_list[:-1]
|
||||||
|
print(day_list)
|
||||||
|
day_list.extend(substract_date( (full_month[-1] + relativedelta(months=+1) ).strftime('%Y%m%d'), date_to.strftime('%Y%m%d')))
|
||||||
|
print(day_list)
|
||||||
|
|
||||||
|
full_month = [dt_month.strftime('%Y%m') for dt_month in full_month]
|
||||||
|
return day_list, full_month
|
||||||
|
|
||||||
# # TODO: refractor me
|
# # TODO: refractor me
|
||||||
|
|
||||||
class Date(object):
|
class Date(object):
|
||||||
|
@ -81,3 +114,23 @@ def substract_date(date_from, date_to):
|
||||||
date = date_from + datetime.timedelta(i)
|
date = date_from + datetime.timedelta(i)
|
||||||
l_date.append( date.strftime('%Y%m%d') )
|
l_date.append( date.strftime('%Y%m%d') )
|
||||||
return l_date
|
return l_date
|
||||||
|
|
||||||
|
def validate_str_date(str_date, separator=''):
|
||||||
|
try:
|
||||||
|
datetime.datetime.strptime(str_date, '%Y{}%m{}%d'.format(separator, separator))
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def sanitise_date_range(date_from, date_to, separator=''):
|
||||||
|
'''
|
||||||
|
Check/Return a correct date_form and date_to
|
||||||
|
'''
|
||||||
|
if not validate_str_date(date_from, separator=separator):
|
||||||
|
date_from = datetime.date.today().strftime("%Y%m%d")
|
||||||
|
if not validate_str_date(date_to, separator=separator):
|
||||||
|
date_to = datetime.date.today().strftime("%Y%m%d")
|
||||||
|
|
||||||
|
if int(date_from) > int(date_to):
|
||||||
|
date_from = date_to
|
||||||
|
return {"date_from": date_from, "date_to": date_to}
|
||||||
|
|
Loading…
Reference in New Issue