mirror of https://github.com/CIRCL/AIL-framework
fix: [domain explorer] fix search by daterange
parent
1ed4875b55
commit
10fbf228c1
|
@ -478,7 +478,7 @@ def get_all_domains_languages():
|
||||||
def get_domains_by_languages(languages, domain_types):
|
def get_domains_by_languages(languages, domain_types):
|
||||||
if len(languages) == 1:
|
if len(languages) == 1:
|
||||||
if len(domain_types) == 1:
|
if len(domain_types) == 1:
|
||||||
return r_crawler.smembers(f'language:domains:{domain_type[0]}:{languages[0]}')
|
return r_crawler.smembers(f'language:domains:{domain_types[0]}:{languages[0]}')
|
||||||
else:
|
else:
|
||||||
l_keys = []
|
l_keys = []
|
||||||
for domain_type in domain_types:
|
for domain_type in domain_types:
|
||||||
|
@ -523,6 +523,15 @@ def get_domains_down_by_date(date, domain_type):
|
||||||
return r_crawler.smembers(f'{domain_type}_down:{date}')
|
return r_crawler.smembers(f'{domain_type}_down:{date}')
|
||||||
|
|
||||||
def get_domains_by_daterange(date_from, date_to, domain_type, up=True, down=False):
|
def get_domains_by_daterange(date_from, date_to, domain_type, up=True, down=False):
|
||||||
|
domains = []
|
||||||
|
for date in Date.substract_date(date_from, date_to):
|
||||||
|
if up:
|
||||||
|
domains.extend(get_domains_up_by_date(date, domain_type))
|
||||||
|
if down:
|
||||||
|
domains.extend(get_domains_down_by_date(date, domain_type))
|
||||||
|
return domains
|
||||||
|
|
||||||
|
def get_domains_dates_by_daterange(date_from, date_to, domain_type, up=True, down=False):
|
||||||
date_domains = {}
|
date_domains = {}
|
||||||
for date in Date.substract_date(date_from, date_to):
|
for date in Date.substract_date(date_from, date_to):
|
||||||
domains = []
|
domains = []
|
||||||
|
@ -541,21 +550,26 @@ def get_domains_meta(domains):
|
||||||
metas.append(dom.get_meta())
|
metas.append(dom.get_meta())
|
||||||
return metas
|
return metas
|
||||||
|
|
||||||
# TODO HANDLE ALL MULTIPLE DOMAIN TYPES
|
|
||||||
# TODO ADD TAGS FILTER
|
# TODO ADD TAGS FILTER
|
||||||
def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[], nb_obj=28, page=1):
|
def get_domains_up_by_filers(domain_types, date_from=None, date_to=None, tags=[], nb_obj=28, page=1):
|
||||||
|
if not domain_types:
|
||||||
|
domain_types = ['onion', 'web']
|
||||||
if not tags:
|
if not tags:
|
||||||
|
domains = []
|
||||||
if not date_from and not date_to:
|
if not date_from and not date_to:
|
||||||
domains = sorted(get_domains_up_by_type(domain_type))
|
for domain_type in domain_types:
|
||||||
|
domains[0:0] = get_domains_up_by_type(domain_type)
|
||||||
else:
|
else:
|
||||||
domains = sorted(get_domains_by_daterange(date_from, date_to, domain_type))
|
for domain_type in domain_types:
|
||||||
|
domains[0:0] = get_domains_by_daterange(date_from, date_to, domain_type)
|
||||||
|
domains = sorted(domains)
|
||||||
domains = paginate_iterator(domains, nb_obj=nb_obj, page=page)
|
domains = paginate_iterator(domains, nb_obj=nb_obj, page=page)
|
||||||
meta = []
|
meta = []
|
||||||
for dom in domains['list_elem']:
|
for dom in domains['list_elem']:
|
||||||
domain = Domain(dom)
|
domain = Domain(dom)
|
||||||
meta.append(domain.get_meta(options={'languages', 'screenshot', 'tags_safe'}))
|
meta.append(domain.get_meta(options={'languages', 'screenshot', 'tags_safe'}))
|
||||||
domains['list_elem'] = meta
|
domains['list_elem'] = meta
|
||||||
domains['domain_type'] = domain_type
|
domains['domain_types'] = domain_types
|
||||||
if date_from:
|
if date_from:
|
||||||
domains['date_from'] = date_from
|
domains['date_from'] = date_from
|
||||||
if date_to:
|
if date_to:
|
||||||
|
|
|
@ -358,12 +358,11 @@ def domains_explorer_post_filter():
|
||||||
date_from = None
|
date_from = None
|
||||||
date_to = None
|
date_to = None
|
||||||
|
|
||||||
# TODO SEARCH BOTH
|
if domain_onion and domain_regular:
|
||||||
# if domain_onion and domain_regular:
|
if date_from and date_to:
|
||||||
# if date_from and date_to:
|
return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to))
|
||||||
# return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to))
|
else:
|
||||||
# else:
|
return redirect(url_for('crawler_splash.domains_explorer_all'))
|
||||||
# return redirect(url_for('crawler_splash.domains_explorer_all'))
|
|
||||||
if domain_regular:
|
if domain_regular:
|
||||||
if date_from and date_to:
|
if date_from and date_to:
|
||||||
return redirect(url_for('crawler_splash.domains_explorer_web', date_from=date_from, date_to=date_to))
|
return redirect(url_for('crawler_splash.domains_explorer_web', date_from=date_from, date_to=date_to))
|
||||||
|
@ -376,22 +375,21 @@ def domains_explorer_post_filter():
|
||||||
return redirect(url_for('crawler_splash.domains_explorer_onion'))
|
return redirect(url_for('crawler_splash.domains_explorer_onion'))
|
||||||
|
|
||||||
|
|
||||||
# TODO TEMP DISABLE
|
@crawler_splash.route('/domains/explorer/all', methods=['GET'])
|
||||||
# @crawler_splash.route('/domains/explorer/all', methods=['GET'])
|
@login_required
|
||||||
# @login_required
|
@login_read_only
|
||||||
# @login_read_only
|
def domains_explorer_all():
|
||||||
# def domains_explorer_all():
|
page = request.args.get('page')
|
||||||
# page = request.args.get('page')
|
date_from = request.args.get('date_from')
|
||||||
# date_from = request.args.get('date_from')
|
date_to = request.args.get('date_to')
|
||||||
# date_to = request.args.get('date_to')
|
try:
|
||||||
# try:
|
page = int(page)
|
||||||
# page = int(page)
|
except:
|
||||||
# except:
|
page = 1
|
||||||
# page = 1
|
|
||||||
#
|
dict_data = Domains.get_domains_up_by_filers(['onion', 'web'], page=page, date_from=date_from, date_to=date_to)
|
||||||
# dict_data = Domain.get_domains_up_by_filers('all', page=page, date_from=date_from, date_to=date_to)
|
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all')
|
||||||
# return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all')
|
|
||||||
#
|
|
||||||
|
|
||||||
@crawler_splash.route('/domains/explorer/onion', methods=['GET'])
|
@crawler_splash.route('/domains/explorer/onion', methods=['GET'])
|
||||||
@login_required
|
@login_required
|
||||||
|
@ -405,7 +403,7 @@ def domains_explorer_onion():
|
||||||
except:
|
except:
|
||||||
page = 1
|
page = 1
|
||||||
|
|
||||||
dict_data = Domains.get_domains_up_by_filers('onion', page=page, date_from=date_from, date_to=date_to)
|
dict_data = Domains.get_domains_up_by_filers(['onion'], page=page, date_from=date_from, date_to=date_to)
|
||||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
|
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
|
||||||
domain_type='onion')
|
domain_type='onion')
|
||||||
|
|
||||||
|
@ -422,7 +420,7 @@ def domains_explorer_web():
|
||||||
except:
|
except:
|
||||||
page = 1
|
page = 1
|
||||||
|
|
||||||
dict_data = Domains.get_domains_up_by_filers('web', page=page, date_from=date_from, date_to=date_to)
|
dict_data = Domains.get_domains_up_by_filers(['web'], page=page, date_from=date_from, date_to=date_to)
|
||||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
|
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
|
||||||
domain_type='regular')
|
domain_type='regular')
|
||||||
|
|
||||||
|
@ -495,7 +493,7 @@ def domains_search_date():
|
||||||
# page = request.args.get('page')
|
# page = request.args.get('page')
|
||||||
|
|
||||||
date = Date.sanitise_date_range(date_from, date_to)
|
date = Date.sanitise_date_range(date_from, date_to)
|
||||||
domains_date = Domains.get_domains_by_daterange(date['date_from'], date['date_to'], domain_type)
|
domains_date = Domains.get_domains_dates_by_daterange(date['date_from'], date['date_to'], domain_type)
|
||||||
dict_domains = {}
|
dict_domains = {}
|
||||||
for d in domains_date:
|
for d in domains_date:
|
||||||
dict_domains[d] = Domains.get_domains_meta(domains_date[d])
|
dict_domains[d] = Domains.get_domains_meta(domains_date[d])
|
||||||
|
|
|
@ -8,11 +8,11 @@
|
||||||
|
|
||||||
<div class="input-group" id="date-range-from">
|
<div class="input-group" id="date-range-from">
|
||||||
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
|
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
|
||||||
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{% if date_from %}{{ date_from }}{% endif %}" name="date_from" autocomplete="off">
|
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{% if date_from %}{{ date_from[0:4] }}-{{ date_from[4:6] }}-{{ date_from[6:8] }}{% endif %}" name="date_from" autocomplete="off">
|
||||||
</div>
|
</div>
|
||||||
<div class="input-group" id="date-range-to">
|
<div class="input-group" id="date-range-to">
|
||||||
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
|
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
|
||||||
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{% if date_to %}{{ date_to }}{% endif %}" name="date_to" autocomplete="off">
|
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{% if date_to %}{{ date_to[0:4] }}-{{ date_to[4:6] }}-{{ date_to[6:8] }}{% endif %}" name="date_to" autocomplete="off">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="col-6">
|
<div class="col-6">
|
||||||
|
|
Loading…
Reference in New Issue