fix: [domain explorer] fix search by daterange

pull/594/head
Terrtia 2023-04-24 10:55:58 +02:00
parent 1ed4875b55
commit 10fbf228c1
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
3 changed files with 45 additions and 33 deletions

View File

@ -478,7 +478,7 @@ def get_all_domains_languages():
def get_domains_by_languages(languages, domain_types):
if len(languages) == 1:
if len(domain_types) == 1:
return r_crawler.smembers(f'language:domains:{domain_type[0]}:{languages[0]}')
return r_crawler.smembers(f'language:domains:{domain_types[0]}:{languages[0]}')
else:
l_keys = []
for domain_type in domain_types:
@ -523,6 +523,15 @@ def get_domains_down_by_date(date, domain_type):
return r_crawler.smembers(f'{domain_type}_down:{date}')
def get_domains_by_daterange(date_from, date_to, domain_type, up=True, down=False):
domains = []
for date in Date.substract_date(date_from, date_to):
if up:
domains.extend(get_domains_up_by_date(date, domain_type))
if down:
domains.extend(get_domains_down_by_date(date, domain_type))
return domains
def get_domains_dates_by_daterange(date_from, date_to, domain_type, up=True, down=False):
date_domains = {}
for date in Date.substract_date(date_from, date_to):
domains = []
@ -541,21 +550,26 @@ def get_domains_meta(domains):
metas.append(dom.get_meta())
return metas
# TODO HANDLE ALL MULTIPLE DOMAIN TYPES
# TODO ADD TAGS FILTER
def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[], nb_obj=28, page=1):
def get_domains_up_by_filers(domain_types, date_from=None, date_to=None, tags=[], nb_obj=28, page=1):
if not domain_types:
domain_types = ['onion', 'web']
if not tags:
domains = []
if not date_from and not date_to:
domains = sorted(get_domains_up_by_type(domain_type))
for domain_type in domain_types:
domains[0:0] = get_domains_up_by_type(domain_type)
else:
domains = sorted(get_domains_by_daterange(date_from, date_to, domain_type))
for domain_type in domain_types:
domains[0:0] = get_domains_by_daterange(date_from, date_to, domain_type)
domains = sorted(domains)
domains = paginate_iterator(domains, nb_obj=nb_obj, page=page)
meta = []
for dom in domains['list_elem']:
domain = Domain(dom)
meta.append(domain.get_meta(options={'languages', 'screenshot', 'tags_safe'}))
domains['list_elem'] = meta
domains['domain_type'] = domain_type
domains['domain_types'] = domain_types
if date_from:
domains['date_from'] = date_from
if date_to:

View File

@ -358,12 +358,11 @@ def domains_explorer_post_filter():
date_from = None
date_to = None
# TODO SEARCH BOTH
# if domain_onion and domain_regular:
# if date_from and date_to:
# return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to))
# else:
# return redirect(url_for('crawler_splash.domains_explorer_all'))
if domain_onion and domain_regular:
if date_from and date_to:
return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to))
else:
return redirect(url_for('crawler_splash.domains_explorer_all'))
if domain_regular:
if date_from and date_to:
return redirect(url_for('crawler_splash.domains_explorer_web', date_from=date_from, date_to=date_to))
@ -376,22 +375,21 @@ def domains_explorer_post_filter():
return redirect(url_for('crawler_splash.domains_explorer_onion'))
# TODO TEMP DISABLE
# @crawler_splash.route('/domains/explorer/all', methods=['GET'])
# @login_required
# @login_read_only
# def domains_explorer_all():
# page = request.args.get('page')
# date_from = request.args.get('date_from')
# date_to = request.args.get('date_to')
# try:
# page = int(page)
# except:
# page = 1
#
# dict_data = Domain.get_domains_up_by_filers('all', page=page, date_from=date_from, date_to=date_to)
# return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all')
#
@crawler_splash.route('/domains/explorer/all', methods=['GET'])
@login_required
@login_read_only
def domains_explorer_all():
page = request.args.get('page')
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
try:
page = int(page)
except:
page = 1
dict_data = Domains.get_domains_up_by_filers(['onion', 'web'], page=page, date_from=date_from, date_to=date_to)
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all')
@crawler_splash.route('/domains/explorer/onion', methods=['GET'])
@login_required
@ -405,7 +403,7 @@ def domains_explorer_onion():
except:
page = 1
dict_data = Domains.get_domains_up_by_filers('onion', page=page, date_from=date_from, date_to=date_to)
dict_data = Domains.get_domains_up_by_filers(['onion'], page=page, date_from=date_from, date_to=date_to)
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
domain_type='onion')
@ -422,7 +420,7 @@ def domains_explorer_web():
except:
page = 1
dict_data = Domains.get_domains_up_by_filers('web', page=page, date_from=date_from, date_to=date_to)
dict_data = Domains.get_domains_up_by_filers(['web'], page=page, date_from=date_from, date_to=date_to)
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
domain_type='regular')
@ -495,7 +493,7 @@ def domains_search_date():
# page = request.args.get('page')
date = Date.sanitise_date_range(date_from, date_to)
domains_date = Domains.get_domains_by_daterange(date['date_from'], date['date_to'], domain_type)
domains_date = Domains.get_domains_dates_by_daterange(date['date_from'], date['date_to'], domain_type)
dict_domains = {}
for d in domains_date:
dict_domains[d] = Domains.get_domains_meta(domains_date[d])

View File

@ -8,11 +8,11 @@
<div class="input-group" id="date-range-from">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{% if date_from %}{{ date_from }}{% endif %}" name="date_from" autocomplete="off">
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{% if date_from %}{{ date_from[0:4] }}-{{ date_from[4:6] }}-{{ date_from[6:8] }}{% endif %}" name="date_from" autocomplete="off">
</div>
<div class="input-group" id="date-range-to">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{% if date_to %}{{ date_to }}{% endif %}" name="date_to" autocomplete="off">
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{% if date_to %}{{ date_to[0:4] }}-{{ date_to[4:6] }}-{{ date_to[6:8] }}{% endif %}" name="date_to" autocomplete="off">
</div>
</div>
<div class="col-6">