mirror of https://github.com/CIRCL/AIL-framework
fix: [domain explorer] fix search by daterange
parent
1ed4875b55
commit
10fbf228c1
|
@ -478,7 +478,7 @@ def get_all_domains_languages():
|
|||
def get_domains_by_languages(languages, domain_types):
|
||||
if len(languages) == 1:
|
||||
if len(domain_types) == 1:
|
||||
return r_crawler.smembers(f'language:domains:{domain_type[0]}:{languages[0]}')
|
||||
return r_crawler.smembers(f'language:domains:{domain_types[0]}:{languages[0]}')
|
||||
else:
|
||||
l_keys = []
|
||||
for domain_type in domain_types:
|
||||
|
@ -523,6 +523,15 @@ def get_domains_down_by_date(date, domain_type):
|
|||
return r_crawler.smembers(f'{domain_type}_down:{date}')
|
||||
|
||||
def get_domains_by_daterange(date_from, date_to, domain_type, up=True, down=False):
|
||||
domains = []
|
||||
for date in Date.substract_date(date_from, date_to):
|
||||
if up:
|
||||
domains.extend(get_domains_up_by_date(date, domain_type))
|
||||
if down:
|
||||
domains.extend(get_domains_down_by_date(date, domain_type))
|
||||
return domains
|
||||
|
||||
def get_domains_dates_by_daterange(date_from, date_to, domain_type, up=True, down=False):
|
||||
date_domains = {}
|
||||
for date in Date.substract_date(date_from, date_to):
|
||||
domains = []
|
||||
|
@ -541,21 +550,26 @@ def get_domains_meta(domains):
|
|||
metas.append(dom.get_meta())
|
||||
return metas
|
||||
|
||||
# TODO HANDLE ALL MULTIPLE DOMAIN TYPES
|
||||
# TODO ADD TAGS FILTER
|
||||
def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[], nb_obj=28, page=1):
|
||||
def get_domains_up_by_filers(domain_types, date_from=None, date_to=None, tags=[], nb_obj=28, page=1):
|
||||
if not domain_types:
|
||||
domain_types = ['onion', 'web']
|
||||
if not tags:
|
||||
domains = []
|
||||
if not date_from and not date_to:
|
||||
domains = sorted(get_domains_up_by_type(domain_type))
|
||||
for domain_type in domain_types:
|
||||
domains[0:0] = get_domains_up_by_type(domain_type)
|
||||
else:
|
||||
domains = sorted(get_domains_by_daterange(date_from, date_to, domain_type))
|
||||
for domain_type in domain_types:
|
||||
domains[0:0] = get_domains_by_daterange(date_from, date_to, domain_type)
|
||||
domains = sorted(domains)
|
||||
domains = paginate_iterator(domains, nb_obj=nb_obj, page=page)
|
||||
meta = []
|
||||
for dom in domains['list_elem']:
|
||||
domain = Domain(dom)
|
||||
meta.append(domain.get_meta(options={'languages', 'screenshot', 'tags_safe'}))
|
||||
domains['list_elem'] = meta
|
||||
domains['domain_type'] = domain_type
|
||||
domains['domain_types'] = domain_types
|
||||
if date_from:
|
||||
domains['date_from'] = date_from
|
||||
if date_to:
|
||||
|
|
|
@ -358,12 +358,11 @@ def domains_explorer_post_filter():
|
|||
date_from = None
|
||||
date_to = None
|
||||
|
||||
# TODO SEARCH BOTH
|
||||
# if domain_onion and domain_regular:
|
||||
# if date_from and date_to:
|
||||
# return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to))
|
||||
# else:
|
||||
# return redirect(url_for('crawler_splash.domains_explorer_all'))
|
||||
if domain_onion and domain_regular:
|
||||
if date_from and date_to:
|
||||
return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to))
|
||||
else:
|
||||
return redirect(url_for('crawler_splash.domains_explorer_all'))
|
||||
if domain_regular:
|
||||
if date_from and date_to:
|
||||
return redirect(url_for('crawler_splash.domains_explorer_web', date_from=date_from, date_to=date_to))
|
||||
|
@ -376,22 +375,21 @@ def domains_explorer_post_filter():
|
|||
return redirect(url_for('crawler_splash.domains_explorer_onion'))
|
||||
|
||||
|
||||
# TODO TEMP DISABLE
|
||||
# @crawler_splash.route('/domains/explorer/all', methods=['GET'])
|
||||
# @login_required
|
||||
# @login_read_only
|
||||
# def domains_explorer_all():
|
||||
# page = request.args.get('page')
|
||||
# date_from = request.args.get('date_from')
|
||||
# date_to = request.args.get('date_to')
|
||||
# try:
|
||||
# page = int(page)
|
||||
# except:
|
||||
# page = 1
|
||||
#
|
||||
# dict_data = Domain.get_domains_up_by_filers('all', page=page, date_from=date_from, date_to=date_to)
|
||||
# return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all')
|
||||
#
|
||||
@crawler_splash.route('/domains/explorer/all', methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def domains_explorer_all():
|
||||
page = request.args.get('page')
|
||||
date_from = request.args.get('date_from')
|
||||
date_to = request.args.get('date_to')
|
||||
try:
|
||||
page = int(page)
|
||||
except:
|
||||
page = 1
|
||||
|
||||
dict_data = Domains.get_domains_up_by_filers(['onion', 'web'], page=page, date_from=date_from, date_to=date_to)
|
||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all')
|
||||
|
||||
|
||||
@crawler_splash.route('/domains/explorer/onion', methods=['GET'])
|
||||
@login_required
|
||||
|
@ -405,7 +403,7 @@ def domains_explorer_onion():
|
|||
except:
|
||||
page = 1
|
||||
|
||||
dict_data = Domains.get_domains_up_by_filers('onion', page=page, date_from=date_from, date_to=date_to)
|
||||
dict_data = Domains.get_domains_up_by_filers(['onion'], page=page, date_from=date_from, date_to=date_to)
|
||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
|
||||
domain_type='onion')
|
||||
|
||||
|
@ -422,7 +420,7 @@ def domains_explorer_web():
|
|||
except:
|
||||
page = 1
|
||||
|
||||
dict_data = Domains.get_domains_up_by_filers('web', page=page, date_from=date_from, date_to=date_to)
|
||||
dict_data = Domains.get_domains_up_by_filers(['web'], page=page, date_from=date_from, date_to=date_to)
|
||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
|
||||
domain_type='regular')
|
||||
|
||||
|
@ -495,7 +493,7 @@ def domains_search_date():
|
|||
# page = request.args.get('page')
|
||||
|
||||
date = Date.sanitise_date_range(date_from, date_to)
|
||||
domains_date = Domains.get_domains_by_daterange(date['date_from'], date['date_to'], domain_type)
|
||||
domains_date = Domains.get_domains_dates_by_daterange(date['date_from'], date['date_to'], domain_type)
|
||||
dict_domains = {}
|
||||
for d in domains_date:
|
||||
dict_domains[d] = Domains.get_domains_meta(domains_date[d])
|
||||
|
|
|
@ -8,11 +8,11 @@
|
|||
|
||||
<div class="input-group" id="date-range-from">
|
||||
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
|
||||
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{% if date_from %}{{ date_from }}{% endif %}" name="date_from" autocomplete="off">
|
||||
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{% if date_from %}{{ date_from[0:4] }}-{{ date_from[4:6] }}-{{ date_from[6:8] }}{% endif %}" name="date_from" autocomplete="off">
|
||||
</div>
|
||||
<div class="input-group" id="date-range-to">
|
||||
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
|
||||
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{% if date_to %}{{ date_to }}{% endif %}" name="date_to" autocomplete="off">
|
||||
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{% if date_to %}{{ date_to[0:4] }}-{{ date_to[4:6] }}-{{ date_to[6:8] }}{% endif %}" name="date_to" autocomplete="off">
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-6">
|
||||
|
|
Loading…
Reference in New Issue