2019-10-30 17:12:04 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
"""
|
2019-10-30 17:12:04 +01:00
|
|
|
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
|
2023-02-17 14:50:20 +01:00
|
|
|
"""
|
2019-10-30 17:12:04 +01:00
|
|
|
|
|
|
|
import os
|
2019-10-31 17:14:23 +01:00
|
|
|
import json
|
|
|
|
import random
|
2022-10-25 16:25:19 +02:00
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
from datetime import datetime
|
2019-10-30 17:12:04 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, send_file, abort
|
2019-10-30 17:12:04 +01:00
|
|
|
from flask_login import login_required, current_user, login_user, logout_user
|
|
|
|
|
|
|
|
sys.path.append('modules')
|
|
|
|
import Flask_config
|
|
|
|
|
|
|
|
# Import Role_Manager
|
2019-11-20 16:15:08 +01:00
|
|
|
from Role_Manager import login_admin, login_analyst, login_read_only
|
2019-10-30 17:12:04 +01:00
|
|
|
|
2022-09-20 16:11:48 +02:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
|
|
|
from lib import crawlers
|
|
|
|
from lib import Language
|
|
|
|
from lib.objects import Domains
|
2022-10-25 16:25:19 +02:00
|
|
|
from lib.objects.Items import Item
|
|
|
|
from lib import Tag
|
|
|
|
|
|
|
|
from packages import Date
|
2019-10-30 17:12:04 +01:00
|
|
|
|
2023-01-16 16:27:49 +01:00
|
|
|
# import Config_DB
|
2019-10-30 17:12:04 +01:00
|
|
|
bootstrap_label = Flask_config.bootstrap_label
|
|
|
|
|
|
|
|
# ============ BLUEPRINT ============
|
2023-02-17 14:50:20 +01:00
|
|
|
crawler_splash = Blueprint('crawler_splash', __name__,
|
|
|
|
template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/crawler/crawler_splash'))
|
2019-10-30 17:12:04 +01:00
|
|
|
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
# ============ VARIABLES ============
|
2019-10-30 17:12:04 +01:00
|
|
|
|
|
|
|
|
|
|
|
# ============ FUNCTIONS ============
|
2022-10-25 16:25:19 +02:00
|
|
|
def api_validator(message, code):
|
|
|
|
if message and code:
|
|
|
|
return Response(json.dumps(message, indent=2, sort_keys=True), mimetype='application/json'), code
|
2019-10-30 17:12:04 +01:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-03-24 17:15:43 +01:00
|
|
|
def create_json_response(data, status_code):
|
|
|
|
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2019-10-30 17:12:04 +01:00
|
|
|
# ============= ROUTES ==============
|
2020-08-17 21:52:57 +02:00
|
|
|
@crawler_splash.route("/crawlers/dashboard", methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawlers_dashboard():
|
2022-10-25 16:25:19 +02:00
|
|
|
is_manager_connected = crawlers.get_lacus_connection_metadata()
|
2023-02-21 12:22:49 +01:00
|
|
|
crawlers_status = crawlers.get_captures_status()
|
2022-10-25 16:25:19 +02:00
|
|
|
crawlers_latest_stats = crawlers.get_crawlers_stats()
|
2023-04-21 10:26:14 +02:00
|
|
|
# print(crawlers_status)
|
2023-03-14 17:36:42 +01:00
|
|
|
# print(crawlers_latest_stats)
|
2020-08-17 21:52:57 +02:00
|
|
|
date = crawlers.get_current_date()
|
2022-10-25 16:25:19 +02:00
|
|
|
return render_template("dashboard_crawler.html", date=date,
|
|
|
|
is_manager_connected=is_manager_connected,
|
|
|
|
crawlers_status=crawlers_status,
|
2023-04-24 13:35:55 +02:00
|
|
|
filter_up=True,
|
2022-10-25 16:25:19 +02:00
|
|
|
crawlers_latest_stats=crawlers_latest_stats)
|
2020-08-17 21:52:57 +02:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-08-17 21:52:57 +02:00
|
|
|
@crawler_splash.route("/crawlers/crawler_dashboard_json", methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawler_dashboard_json():
|
2023-02-21 12:22:49 +01:00
|
|
|
crawlers_status = crawlers.get_captures_status()
|
2022-10-25 16:25:19 +02:00
|
|
|
crawlers_latest_stats = crawlers.get_crawlers_stats()
|
2023-03-14 17:36:42 +01:00
|
|
|
# print(crawlers_status)
|
2020-08-17 21:52:57 +02:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
return jsonify({'crawlers_status': crawlers_status,
|
|
|
|
'stats': crawlers_latest_stats})
|
2020-08-17 21:52:57 +02:00
|
|
|
|
2023-12-12 10:35:33 +01:00
|
|
|
@crawler_splash.route("/crawlers/dashboard/captures/delete", methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_admin
|
2023-12-12 10:36:42 +01:00
|
|
|
def crawlers_dashboard_captures_delete():
|
2023-12-12 10:35:33 +01:00
|
|
|
crawlers.delete_captures()
|
|
|
|
return redirect(url_for('crawler_splash.crawlers_dashboard'))
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-03-24 17:15:43 +01:00
|
|
|
@crawler_splash.route("/crawlers/manual", methods=['GET'])
|
2020-03-30 18:43:50 +02:00
|
|
|
@login_required
|
|
|
|
@login_read_only
|
2020-03-24 17:15:43 +01:00
|
|
|
def manual():
|
|
|
|
user_id = current_user.get_id()
|
2023-02-17 14:50:20 +01:00
|
|
|
l_cookiejar = crawlers.api_get_cookiejars_selector(user_id)
|
2022-10-25 16:25:19 +02:00
|
|
|
crawlers_types = crawlers.get_crawler_all_types()
|
|
|
|
proxies = [] # TODO HANDLE PROXIES
|
2020-08-24 22:31:41 +02:00
|
|
|
return render_template("crawler_manual.html",
|
2023-02-17 14:50:20 +01:00
|
|
|
is_manager_connected=crawlers.get_lacus_connection_metadata(),
|
|
|
|
crawlers_types=crawlers_types,
|
|
|
|
proxies=proxies,
|
2023-06-20 08:11:44 +02:00
|
|
|
l_cookiejar=l_cookiejar,
|
|
|
|
tags_selector_data=Tag.get_tags_selector_data())
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-03-24 17:15:43 +01:00
|
|
|
|
2020-03-30 18:43:50 +02:00
|
|
|
@crawler_splash.route("/crawlers/send_to_spider", methods=['POST'])
|
|
|
|
@login_required
|
|
|
|
@login_analyst
|
|
|
|
def send_to_spider():
|
|
|
|
user_id = current_user.get_id()
|
|
|
|
|
|
|
|
# POST val
|
|
|
|
url = request.form.get('url_to_crawl')
|
2020-08-24 22:31:41 +02:00
|
|
|
crawler_type = request.form.get('crawler_queue_type')
|
2020-03-30 18:43:50 +02:00
|
|
|
screenshot = request.form.get('screenshot')
|
|
|
|
har = request.form.get('har')
|
|
|
|
depth_limit = request.form.get('depth_limit')
|
|
|
|
cookiejar_uuid = request.form.get('cookiejar')
|
|
|
|
|
2023-06-20 08:11:44 +02:00
|
|
|
# TAGS
|
|
|
|
tags = request.form.get("tags", [])
|
|
|
|
taxonomies_tags = request.form.get('taxonomies_tags')
|
|
|
|
if taxonomies_tags:
|
|
|
|
try:
|
|
|
|
taxonomies_tags = json.loads(taxonomies_tags)
|
|
|
|
except:
|
|
|
|
taxonomies_tags = []
|
|
|
|
else:
|
|
|
|
taxonomies_tags = []
|
|
|
|
galaxies_tags = request.form.get('galaxies_tags')
|
|
|
|
if galaxies_tags:
|
|
|
|
try:
|
|
|
|
galaxies_tags = json.loads(galaxies_tags)
|
|
|
|
except:
|
|
|
|
galaxies_tags = []
|
|
|
|
else:
|
|
|
|
galaxies_tags = []
|
|
|
|
# custom tags
|
|
|
|
if tags:
|
|
|
|
tags = tags.split()
|
|
|
|
else:
|
|
|
|
tags = []
|
|
|
|
escaped = []
|
|
|
|
for tag in tags:
|
|
|
|
escaped.append(tag)
|
|
|
|
tags = escaped + taxonomies_tags + galaxies_tags
|
|
|
|
|
2023-03-14 17:36:42 +01:00
|
|
|
# Frequency
|
|
|
|
if request.form.get('crawler_scheduler'):
|
|
|
|
frequency = request.form.get('frequency')
|
|
|
|
if frequency == 'custom':
|
|
|
|
months = request.form.get('frequency_months', 0)
|
|
|
|
weeks = request.form.get('frequency_weeks', 0)
|
|
|
|
days = request.form.get('frequency_days', 0)
|
|
|
|
hours = request.form.get('frequency_hours', 0)
|
|
|
|
minutes = request.form.get('frequency_minutes', 0)
|
|
|
|
frequency = {'months': months, 'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes}
|
|
|
|
else:
|
|
|
|
frequency = None
|
|
|
|
|
2023-02-21 12:22:49 +01:00
|
|
|
# PROXY
|
|
|
|
proxy = request.form.get('proxy_name')
|
|
|
|
if proxy:
|
|
|
|
res = crawlers.api_verify_proxy(proxy)
|
|
|
|
if res[1] != 200:
|
|
|
|
return create_json_response(res[0], res[1])
|
|
|
|
elif crawler_type == 'onion':
|
2022-10-25 16:25:19 +02:00
|
|
|
proxy = 'force_tor'
|
2020-08-24 22:31:41 +02:00
|
|
|
|
2020-03-30 18:43:50 +02:00
|
|
|
if cookiejar_uuid:
|
|
|
|
if cookiejar_uuid == 'None':
|
|
|
|
cookiejar_uuid = None
|
|
|
|
else:
|
|
|
|
cookiejar_uuid = cookiejar_uuid.rsplit(':')
|
|
|
|
cookiejar_uuid = cookiejar_uuid[-1].replace(' ', '')
|
|
|
|
|
2023-03-14 17:36:42 +01:00
|
|
|
data = {'url': url, 'depth': depth_limit, 'har': har, 'screenshot': screenshot, 'frequency': frequency}
|
2022-10-25 16:25:19 +02:00
|
|
|
if proxy:
|
|
|
|
data['proxy'] = proxy
|
|
|
|
if cookiejar_uuid:
|
|
|
|
data['cookiejar'] = cookiejar_uuid
|
2023-06-20 08:11:44 +02:00
|
|
|
if tags:
|
|
|
|
data['tags'] = tags
|
2023-02-21 12:22:49 +01:00
|
|
|
# print(data)
|
2022-10-25 16:25:19 +02:00
|
|
|
res = crawlers.api_add_crawler_task(data, user_id=user_id)
|
|
|
|
|
|
|
|
if res[1] != 200:
|
2020-03-30 18:43:50 +02:00
|
|
|
return create_json_response(res[0], res[1])
|
|
|
|
return redirect(url_for('crawler_splash.manual'))
|
2020-03-24 17:15:43 +01:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2023-03-14 17:36:42 +01:00
|
|
|
@crawler_splash.route("/crawlers/scheduler", methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def scheduler_dashboard():
|
|
|
|
schedulers = crawlers.get_schedulers_metas()
|
|
|
|
# print(schedulers)
|
|
|
|
# TODO list currently queued ?
|
|
|
|
return render_template("crawler_scheduler_dashboard.html",
|
2023-06-20 08:11:44 +02:00
|
|
|
bootstrap_label=bootstrap_label,
|
2023-03-14 17:36:42 +01:00
|
|
|
schedulers=schedulers,
|
|
|
|
is_manager_connected=crawlers.get_lacus_connection_metadata())
|
|
|
|
|
|
|
|
@crawler_splash.route("/crawlers/schedule", methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def schedule_show():
|
|
|
|
schedule_uuid = request.args.get('uuid')
|
|
|
|
schedule = crawlers.CrawlerSchedule(schedule_uuid)
|
|
|
|
if not schedule.exists():
|
|
|
|
abort(404)
|
|
|
|
meta = schedule.get_meta(ui=True)
|
|
|
|
return render_template("crawler_schedule_uuid.html",
|
2023-06-20 08:11:44 +02:00
|
|
|
bootstrap_label=bootstrap_label,
|
2023-03-14 17:36:42 +01:00
|
|
|
meta=meta)
|
|
|
|
|
|
|
|
@crawler_splash.route("/crawlers/schedule/delete", methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_analyst
|
|
|
|
def schedule_delete():
|
|
|
|
schedule_uuid = request.args.get('uuid')
|
|
|
|
schedule = crawlers.CrawlerSchedule(schedule_uuid)
|
|
|
|
if not schedule.exists():
|
|
|
|
abort(404)
|
|
|
|
res = crawlers.api_delete_schedule({'uuid': schedule_uuid})
|
|
|
|
if res[1] != 200:
|
|
|
|
return create_json_response(res[0], res[1])
|
|
|
|
return redirect(url_for('crawler_splash.scheduler_dashboard'))
|
|
|
|
|
2023-03-31 09:25:06 +02:00
|
|
|
@crawler_splash.route("/crawlers/blacklist", methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_analyst
|
|
|
|
def crawler_blacklist():
|
|
|
|
domain = request.args.get('domain')
|
|
|
|
if domain:
|
|
|
|
res = crawlers.api_blacklist_domain({'domain': domain})
|
|
|
|
if res[1] != 200:
|
|
|
|
if res[0].get('error') == 'domain already blacklisted':
|
|
|
|
error_code = 2
|
|
|
|
else:
|
|
|
|
error_code = 1
|
|
|
|
else:
|
|
|
|
error_code = 0
|
|
|
|
domain = None
|
|
|
|
else:
|
|
|
|
domain = None
|
|
|
|
error_code = None
|
|
|
|
blacklist = crawlers.get_blacklist()
|
|
|
|
return render_template("crawler_blacklist.html", blacklist=blacklist,
|
|
|
|
domain=domain, error_code=error_code,
|
|
|
|
is_manager_connected=crawlers.get_lacus_connection_metadata())
|
|
|
|
|
|
|
|
@crawler_splash.route("/crawlers/blacklist/delete", methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_analyst
|
|
|
|
def crawler_blacklist_delete():
|
|
|
|
domain = request.args.get('domain')
|
|
|
|
res = crawlers.api_unblacklist_domain({'domain': domain})
|
|
|
|
if res[1] != 200:
|
|
|
|
return create_json_response(res[0], res[1])
|
|
|
|
return redirect(url_for('crawler_splash.crawler_blacklist'))
|
|
|
|
|
2023-03-14 17:36:42 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
@crawler_splash.route("/crawlers/last/domains", methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawlers_last_domains():
|
|
|
|
domain_type = request.args.get('type')
|
|
|
|
if domain_type not in crawlers.get_crawler_all_types():
|
|
|
|
return jsonify({'error': 'Invalid domain type'}), 400
|
|
|
|
|
|
|
|
# TODO STAT by EPOCH
|
|
|
|
domains = []
|
|
|
|
for domain_row in crawlers.get_last_crawled_domains(domain_type):
|
|
|
|
domain, epoch = domain_row.split(':', 1)
|
|
|
|
dom = Domains.Domain(domain)
|
|
|
|
meta = dom.get_meta()
|
2023-08-01 14:30:36 +02:00
|
|
|
meta['last'] = datetime.fromtimestamp(int(epoch)).strftime("%Y/%m/%d %H:%M.%S")
|
2022-10-25 16:25:19 +02:00
|
|
|
meta['epoch'] = epoch
|
|
|
|
meta['status_epoch'] = dom.is_up_by_epoch(epoch)
|
|
|
|
domains.append(meta)
|
|
|
|
crawler_stats = crawlers.get_crawlers_stats(domain_type=domain_type)
|
|
|
|
|
|
|
|
now = datetime.now()
|
|
|
|
date = now.strftime("%Y%m%d")
|
|
|
|
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
|
|
|
|
return render_template("last_crawled.html", domains=domains, type=domain_type,
|
|
|
|
is_manager_connected=crawlers.get_lacus_connection_metadata(),
|
|
|
|
date_from=date_string, date_to=date_string,
|
|
|
|
crawler_stats=crawler_stats)
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
@crawler_splash.route('/crawlers/last/domains/json')
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawlers_last_domains_json():
|
|
|
|
domain_type = request.args.get('type')
|
|
|
|
if domain_type not in crawlers.get_crawler_all_types():
|
|
|
|
return jsonify({'error': 'Invalid domain type'}), 400
|
|
|
|
stats = []
|
|
|
|
for date in Date.get_date_range(7):
|
|
|
|
stats.append(crawlers.get_crawlers_stats_by_day(date, domain_type))
|
|
|
|
return jsonify(stats)
|
2020-12-11 21:02:07 +01:00
|
|
|
|
2024-02-27 15:24:18 +01:00
|
|
|
@crawler_splash.route('/crawlers/last/domains/month/json')
|
2024-02-27 14:56:48 +01:00
|
|
|
@login_required
|
|
|
|
@login_read_only
|
2024-02-27 15:24:18 +01:00
|
|
|
def crawlers_last_domains_month_json():
|
2024-02-27 14:56:48 +01:00
|
|
|
domain_type = request.args.get('type')
|
|
|
|
if domain_type not in crawlers.get_crawler_all_types():
|
|
|
|
return jsonify({'error': 'Invalid domain type'}), 400
|
|
|
|
stats = crawlers.get_crawlers_stats_by_month(domain_type)
|
|
|
|
return jsonify(stats)
|
|
|
|
|
2024-05-15 10:03:00 +02:00
|
|
|
@crawler_splash.route('/crawlers/last/domains/month/previous/json')
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawlers_last_domains_previous_month_json():
|
|
|
|
domain_type = request.args.get('type')
|
|
|
|
if domain_type not in crawlers.get_crawler_all_types():
|
|
|
|
return jsonify({'error': 'Invalid domain type'}), 400
|
|
|
|
date = Date.get_previous_month_date()
|
|
|
|
stats = crawlers.get_crawlers_stats_by_month(domain_type, date=date)
|
|
|
|
return jsonify(stats)
|
|
|
|
|
2024-02-28 14:19:47 +01:00
|
|
|
@crawler_splash.route('/crawlers/last/domains/status/month/json')
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawlers_last_domains_status_month_json():
|
|
|
|
domain_type = request.args.get('type')
|
|
|
|
if domain_type not in crawlers.get_crawler_all_types():
|
|
|
|
return jsonify({'error': 'Invalid domain type'}), 400
|
|
|
|
stats = crawlers.get_crawlers_stats_up_down_by_month(domain_type)
|
|
|
|
data = []
|
|
|
|
for key in stats:
|
|
|
|
data.append({'name': key, 'value': stats[key]})
|
|
|
|
return jsonify(data)
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-12-11 21:02:07 +01:00
|
|
|
#### Domains ####
|
|
|
|
|
2019-11-05 09:49:51 +01:00
|
|
|
# add route : /crawlers/show_domain
|
2020-01-13 14:24:35 +01:00
|
|
|
@crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST'])
|
2019-11-08 09:25:09 +01:00
|
|
|
@login_required
|
2019-11-20 16:15:08 +01:00
|
|
|
@login_read_only
|
2019-10-30 17:12:04 +01:00
|
|
|
def showDomain():
|
2020-01-13 14:24:35 +01:00
|
|
|
if request.method == 'POST':
|
|
|
|
domain_name = request.form.get('in_show_domain')
|
|
|
|
epoch = None
|
|
|
|
else:
|
|
|
|
domain_name = request.args.get('domain')
|
|
|
|
epoch = request.args.get('epoch')
|
2022-10-25 16:25:19 +02:00
|
|
|
try:
|
|
|
|
epoch = int(epoch)
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
epoch = None
|
2019-10-31 17:14:23 +01:00
|
|
|
|
2022-09-20 16:11:48 +02:00
|
|
|
domain = Domains.Domain(domain_name)
|
2022-10-25 16:25:19 +02:00
|
|
|
if not domain.exists():
|
|
|
|
abort(404)
|
2019-10-30 17:12:04 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
dict_domain = domain.get_meta(options=['last_origin', 'languages'])
|
|
|
|
dict_domain['domain'] = domain.id
|
|
|
|
if domain.was_up():
|
2023-04-21 10:26:14 +02:00
|
|
|
dict_domain = {**dict_domain, **domain.get_correlations(unpack=True)}
|
2023-02-17 14:50:20 +01:00
|
|
|
dict_domain['correlation_nb'] = len(dict_domain['decoded']) + len(dict_domain['username']) + len(
|
|
|
|
dict_domain['pgp']) + len(dict_domain['cryptocurrency']) + len(dict_domain['screenshot'])
|
2019-11-19 14:03:23 +01:00
|
|
|
dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags'])
|
2022-10-25 16:25:19 +02:00
|
|
|
dict_domain['history'] = domain.get_history(status=True)
|
|
|
|
curr_epoch = None
|
|
|
|
# Select valid epoch
|
|
|
|
if epoch:
|
|
|
|
for row in dict_domain['history']:
|
|
|
|
if row['epoch'] == epoch:
|
|
|
|
curr_epoch = row['epoch']
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
curr_epoch = -1
|
|
|
|
for row in dict_domain['history']:
|
|
|
|
if row['epoch'] > curr_epoch:
|
|
|
|
curr_epoch = row['epoch']
|
|
|
|
dict_domain['epoch'] = curr_epoch
|
|
|
|
dict_domain["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(curr_epoch))
|
|
|
|
|
2023-03-14 17:36:42 +01:00
|
|
|
# print(dict_domain['epoch'])
|
2022-10-25 16:25:19 +02:00
|
|
|
|
|
|
|
dict_domain['crawler_history_items'] = []
|
|
|
|
for item_id in domain.get_crawled_items_by_epoch(epoch):
|
2023-03-14 17:36:42 +01:00
|
|
|
dict_domain['crawler_history_items'].append(Item(item_id).get_meta(options={'crawler'}))
|
2022-10-25 16:25:19 +02:00
|
|
|
if dict_domain['crawler_history_items']:
|
|
|
|
dict_domain['random_item'] = random.choice(dict_domain['crawler_history_items'])
|
|
|
|
|
|
|
|
return render_template("showDomain.html",
|
|
|
|
dict_domain=dict_domain, bootstrap_label=bootstrap_label,
|
|
|
|
modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain"))
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
@crawler_splash.route('/crawlers/domain/download', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawlers_domain_download():
|
|
|
|
domain = request.args.get('domain')
|
|
|
|
epoch = request.args.get('epoch')
|
|
|
|
try:
|
|
|
|
epoch = int(epoch)
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
epoch = None
|
|
|
|
dom = Domains.Domain(domain)
|
|
|
|
if not dom.exists():
|
|
|
|
abort(404)
|
|
|
|
zip_file = dom.get_download_zip(epoch=epoch)
|
|
|
|
if not zip_file:
|
|
|
|
abort(404)
|
|
|
|
return send_file(zip_file, download_name=f'{dom.get_id()}.zip', as_attachment=True)
|
2020-01-23 15:43:54 +01:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-01-24 15:03:04 +01:00
|
|
|
@crawler_splash.route('/domains/explorer/domain_type_post', methods=['POST'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def domains_explorer_post_filter():
|
|
|
|
domain_onion = request.form.get('domain_onion_switch')
|
|
|
|
domain_regular = request.form.get('domain_regular_switch')
|
|
|
|
date_from = request.form.get('date_from')
|
|
|
|
date_to = request.form.get('date_to')
|
|
|
|
|
|
|
|
if date_from and date_to:
|
|
|
|
date_from = date_from.replace('-', '')
|
|
|
|
date_to = date_to.replace('-', '')
|
|
|
|
else:
|
|
|
|
date_from = None
|
|
|
|
date_to = None
|
|
|
|
|
2023-04-24 10:55:58 +02:00
|
|
|
if domain_onion and domain_regular:
|
|
|
|
if date_from and date_to:
|
|
|
|
return redirect(url_for('crawler_splash.domains_explorer_all', date_from=date_from, date_to=date_to))
|
|
|
|
else:
|
|
|
|
return redirect(url_for('crawler_splash.domains_explorer_all'))
|
2022-11-28 15:01:40 +01:00
|
|
|
if domain_regular:
|
2020-01-24 15:03:04 +01:00
|
|
|
if date_from and date_to:
|
|
|
|
return redirect(url_for('crawler_splash.domains_explorer_web', date_from=date_from, date_to=date_to))
|
|
|
|
else:
|
|
|
|
return redirect(url_for('crawler_splash.domains_explorer_web'))
|
|
|
|
else:
|
|
|
|
if date_from and date_to:
|
|
|
|
return redirect(url_for('crawler_splash.domains_explorer_onion', date_from=date_from, date_to=date_to))
|
|
|
|
else:
|
|
|
|
return redirect(url_for('crawler_splash.domains_explorer_onion'))
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2023-04-24 10:55:58 +02:00
|
|
|
@crawler_splash.route('/domains/explorer/all', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def domains_explorer_all():
|
|
|
|
page = request.args.get('page')
|
|
|
|
date_from = request.args.get('date_from')
|
|
|
|
date_to = request.args.get('date_to')
|
|
|
|
try:
|
|
|
|
page = int(page)
|
|
|
|
except:
|
|
|
|
page = 1
|
|
|
|
|
|
|
|
dict_data = Domains.get_domains_up_by_filers(['onion', 'web'], page=page, date_from=date_from, date_to=date_to)
|
|
|
|
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='all')
|
|
|
|
|
2020-01-24 15:03:04 +01:00
|
|
|
|
|
|
|
@crawler_splash.route('/domains/explorer/onion', methods=['GET'])
|
2020-01-23 15:43:54 +01:00
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def domains_explorer_onion():
|
|
|
|
page = request.args.get('page')
|
2020-01-24 15:03:04 +01:00
|
|
|
date_from = request.args.get('date_from')
|
|
|
|
date_to = request.args.get('date_to')
|
2020-01-23 15:43:54 +01:00
|
|
|
try:
|
|
|
|
page = int(page)
|
|
|
|
except:
|
|
|
|
page = 1
|
|
|
|
|
2023-04-24 10:55:58 +02:00
|
|
|
dict_data = Domains.get_domains_up_by_filers(['onion'], page=page, date_from=date_from, date_to=date_to)
|
2023-02-17 14:50:20 +01:00
|
|
|
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
|
|
|
|
domain_type='onion')
|
|
|
|
|
2020-01-23 15:43:54 +01:00
|
|
|
|
2020-01-24 15:03:04 +01:00
|
|
|
@crawler_splash.route('/domains/explorer/web', methods=['GET'])
|
2020-01-23 15:43:54 +01:00
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def domains_explorer_web():
|
|
|
|
page = request.args.get('page')
|
2020-01-24 15:03:04 +01:00
|
|
|
date_from = request.args.get('date_from')
|
|
|
|
date_to = request.args.get('date_to')
|
2020-01-23 15:43:54 +01:00
|
|
|
try:
|
|
|
|
page = int(page)
|
|
|
|
except:
|
|
|
|
page = 1
|
|
|
|
|
2023-04-24 10:55:58 +02:00
|
|
|
dict_data = Domains.get_domains_up_by_filers(['web'], page=page, date_from=date_from, date_to=date_to)
|
2023-02-17 14:50:20 +01:00
|
|
|
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label,
|
|
|
|
domain_type='regular')
|
|
|
|
|
2020-03-23 18:00:09 +01:00
|
|
|
|
2020-12-11 21:02:07 +01:00
|
|
|
@crawler_splash.route('/domains/languages/all/json', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def domains_all_languages_json():
|
|
|
|
# # TODO: get domain type
|
|
|
|
iso = request.args.get('iso')
|
|
|
|
domain_types = request.args.getlist('domain_types')
|
2022-11-28 15:01:40 +01:00
|
|
|
return jsonify(Language.get_languages_from_iso(Domains.get_all_domains_languages(), sort=True))
|
2020-12-11 21:02:07 +01:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-12-11 21:02:07 +01:00
|
|
|
@crawler_splash.route('/domains/languages/search_get', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def domains_search_languages_get():
|
|
|
|
page = request.args.get('page')
|
|
|
|
try:
|
|
|
|
page = int(page)
|
|
|
|
except:
|
|
|
|
page = 1
|
2021-06-09 15:09:06 +02:00
|
|
|
|
2020-12-11 21:02:07 +01:00
|
|
|
domains_types = request.args.getlist('domain_types')
|
|
|
|
if domains_types:
|
|
|
|
domains_types = domains_types[0].split(',')
|
2022-11-28 15:01:40 +01:00
|
|
|
domains_types = Domains.sanitize_domains_types(domains_types)
|
2021-06-09 15:09:06 +02:00
|
|
|
|
2020-12-11 21:02:07 +01:00
|
|
|
languages = request.args.getlist('languages')
|
|
|
|
if languages:
|
|
|
|
languages = languages[0].split(',')
|
2023-02-17 14:50:20 +01:00
|
|
|
l_dict_domains = Domains.api_get_domains_by_languages(domains_types, Language.get_iso_from_languages(languages),
|
|
|
|
meta=True, page=page)
|
2020-12-11 21:02:07 +01:00
|
|
|
return render_template("domains/domains_filter_languages.html", template_folder='../../',
|
2023-02-17 14:50:20 +01:00
|
|
|
l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label,
|
|
|
|
current_languages=languages, domains_types=domains_types)
|
|
|
|
|
2020-12-11 21:02:07 +01:00
|
|
|
|
2021-02-05 17:42:33 +01:00
|
|
|
@crawler_splash.route('/domains/name/search', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_analyst
|
|
|
|
def domains_search_name():
|
|
|
|
name = request.args.get('name')
|
|
|
|
page = request.args.get('page')
|
|
|
|
try:
|
|
|
|
page = int(page)
|
|
|
|
except:
|
|
|
|
page = 1
|
2021-06-09 15:09:06 +02:00
|
|
|
|
2023-04-24 13:35:55 +02:00
|
|
|
if not name:
|
|
|
|
return create_json_response({'error': 'Mandatory args name not provided'}, 400)
|
|
|
|
|
2021-02-05 17:42:33 +01:00
|
|
|
domains_types = request.args.getlist('domain_types')
|
|
|
|
if domains_types:
|
|
|
|
domains_types = domains_types[0].split(',')
|
2022-11-28 15:01:40 +01:00
|
|
|
domains_types = Domains.sanitize_domains_types(domains_types)
|
2021-02-05 17:42:33 +01:00
|
|
|
|
2022-11-28 15:01:40 +01:00
|
|
|
l_dict_domains = Domains.api_search_domains_by_name(name, domains_types, meta=True, page=page)
|
2021-02-05 17:42:33 +01:00
|
|
|
return render_template("domains/domains_result_list.html", template_folder='../../',
|
2023-02-17 14:50:20 +01:00
|
|
|
l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label,
|
|
|
|
domains_types=domains_types)
|
|
|
|
|
2021-02-05 17:42:33 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
@crawler_splash.route('/domains/date', methods=['GET'])
|
2022-03-07 15:12:01 +01:00
|
|
|
@login_required
|
|
|
|
@login_analyst
|
2022-10-25 16:25:19 +02:00
|
|
|
def domains_search_date():
|
|
|
|
# TODO sanitize type + date
|
2023-04-24 13:35:55 +02:00
|
|
|
dom_types = request.args.get('type')
|
2022-10-25 16:25:19 +02:00
|
|
|
date_from = request.args.get('date_from')
|
|
|
|
date_to = request.args.get('date_to')
|
2023-04-24 13:35:55 +02:00
|
|
|
down = bool(request.args.get('down', False))
|
|
|
|
up = bool(request.args.get('up'))
|
2022-10-25 16:25:19 +02:00
|
|
|
# page = request.args.get('page')
|
|
|
|
|
2023-04-24 13:35:55 +02:00
|
|
|
all_types = Domains.get_all_domains_types()
|
|
|
|
if dom_types == 'all':
|
|
|
|
domain_types = all_types
|
|
|
|
elif dom_types in Domains.get_all_domains_types():
|
|
|
|
domain_types = [dom_types]
|
|
|
|
else:
|
|
|
|
dom_types = 'all'
|
|
|
|
domain_types = all_types
|
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
date = Date.sanitise_date_range(date_from, date_to)
|
2023-04-24 13:35:55 +02:00
|
|
|
domains_date = Domains.get_domains_dates_by_daterange(date['date_from'], date['date_to'], domain_types,
|
|
|
|
up=up, down=down)
|
2022-10-25 16:25:19 +02:00
|
|
|
dict_domains = {}
|
|
|
|
for d in domains_date:
|
|
|
|
dict_domains[d] = Domains.get_domains_meta(domains_date[d])
|
|
|
|
date_from = f"{date['date_from'][0:4]}-{date['date_from'][4:6]}-{date['date_from'][6:8]}"
|
|
|
|
date_to = f"{date['date_to'][0:4]}-{date['date_to'][4:6]}-{date['date_to'][6:8]}"
|
2022-03-07 15:12:01 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
return render_template("domains_daterange.html", date_from=date_from, date_to=date_to,
|
|
|
|
bootstrap_label=bootstrap_label,
|
2023-04-24 13:35:55 +02:00
|
|
|
filter_down=down, filter_up=up,
|
|
|
|
dict_domains=dict_domains, type=dom_types)
|
2022-10-25 16:25:19 +02:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
@crawler_splash.route('/domains/date/post', methods=['POST'])
|
|
|
|
@login_required
|
|
|
|
@login_analyst
|
|
|
|
def domains_search_date_post():
|
|
|
|
domain_type = request.form.get('type')
|
|
|
|
date_from = request.form.get('date_from')
|
|
|
|
date_to = request.form.get('date_to')
|
2023-04-24 13:35:55 +02:00
|
|
|
down = request.form.get('down')
|
|
|
|
up = request.form.get('up')
|
|
|
|
return redirect(url_for('crawler_splash.domains_search_date', date_from=date_from, date_to=date_to,
|
|
|
|
type=domain_type, down=down, up=up))
|
2022-03-07 15:12:01 +01:00
|
|
|
|
|
|
|
|
2024-02-19 15:14:37 +01:00
|
|
|
@crawler_splash.route('/domains/explorer/vanity', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_analyst
|
|
|
|
def domains_explorer_vanity_clusters():
|
2024-02-19 15:23:16 +01:00
|
|
|
nb_min = request.args.get('min', 4)
|
2024-02-19 15:14:37 +01:00
|
|
|
if int(nb_min) < 0:
|
|
|
|
nb_min = 4
|
|
|
|
vanity_clusters = Domains.get_vanity_clusters(nb_min=nb_min)
|
|
|
|
return render_template("explorer_vanity_clusters.html", vanity_clusters=vanity_clusters,
|
|
|
|
length=4)
|
|
|
|
|
|
|
|
@crawler_splash.route('/domains/explorer/vanity/explore', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_analyst
|
|
|
|
def domains_explorer_vanity_explore():
|
|
|
|
vanity = request.args.get('vanity')
|
2024-02-19 15:38:51 +01:00
|
|
|
nb_min = request.args.get('min', 2) # TODO SHOW DOMAINS OPTIONS + HARD CODED DOMAINS LIMIT FOR RENDER
|
2024-02-19 15:14:37 +01:00
|
|
|
length = len(vanity)
|
|
|
|
if int(nb_min) < 0:
|
|
|
|
nb_min = 4
|
|
|
|
vanity_clusters = Domains.get_vanity_cluster(vanity, len_vanity=length+1, nb_min=nb_min)
|
|
|
|
vanity_domains = Domains.get_vanity_domains(vanity, len_vanity=length, meta=True)
|
|
|
|
vanities_tree = []
|
|
|
|
for i in range(4, length):
|
|
|
|
vanities_tree.append(vanity[:i])
|
|
|
|
if length == len(vanity):
|
|
|
|
vanities_tree.append(vanity)
|
|
|
|
return render_template("explorer_vanity_domains.html", vanity_clusters=vanity_clusters,
|
|
|
|
bootstrap_label=bootstrap_label, vanity=vanity, vanities_tree=vanities_tree,
|
|
|
|
vanity_domains=vanity_domains, length=length)
|
|
|
|
|
2020-12-11 21:02:07 +01:00
|
|
|
##-- --##
|
|
|
|
|
|
|
|
|
2020-03-26 09:53:07 +01:00
|
|
|
## Cookiejar ##
|
|
|
|
@crawler_splash.route('/crawler/cookiejar/add', methods=['GET'])
|
2020-03-30 18:43:50 +02:00
|
|
|
@login_required
|
|
|
|
@login_analyst
|
2020-03-26 09:53:07 +01:00
|
|
|
def crawler_cookiejar_add():
|
|
|
|
return render_template("add_cookiejar.html")
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-03-27 17:06:26 +01:00
|
|
|
@crawler_splash.route('/crawler/cookiejar/add_post', methods=['POST'])
|
2020-03-30 18:43:50 +02:00
|
|
|
@login_required
|
|
|
|
@login_analyst
|
2020-03-27 17:06:26 +01:00
|
|
|
def crawler_cookiejar_add_post():
|
2020-03-23 18:00:09 +01:00
|
|
|
user_id = current_user.get_id()
|
|
|
|
|
|
|
|
description = request.form.get('description')
|
|
|
|
level = request.form.get('level')
|
|
|
|
if level:
|
|
|
|
level = 1
|
|
|
|
else:
|
|
|
|
level = 0
|
|
|
|
|
|
|
|
if 'file' in request.files:
|
|
|
|
file = request.files['file']
|
2020-03-27 17:06:26 +01:00
|
|
|
json_cookies = file.read().decode()
|
2020-03-23 18:00:09 +01:00
|
|
|
else:
|
2020-03-27 17:06:26 +01:00
|
|
|
json_cookies = None
|
2020-03-23 18:00:09 +01:00
|
|
|
|
|
|
|
# Get cookies to add
|
|
|
|
l_manual_cookie = []
|
|
|
|
l_invalid_cookie = []
|
|
|
|
for obj_tuple in list(request.form):
|
|
|
|
l_input = request.form.getlist(obj_tuple)
|
|
|
|
if len(l_input) == 2:
|
2023-02-17 14:50:20 +01:00
|
|
|
if l_input[0]: # Cookie Name
|
2020-03-23 18:00:09 +01:00
|
|
|
cookie_dict = {'name': l_input[0], 'value': l_input[1]}
|
|
|
|
l_manual_cookie.append(cookie_dict)
|
2023-02-17 14:50:20 +01:00
|
|
|
elif l_input[1]: # Cookie Value
|
|
|
|
l_invalid_cookie.append({'name': '', 'value': l_input[1]})
|
2020-03-24 17:15:43 +01:00
|
|
|
if l_invalid_cookie:
|
2023-02-17 14:50:20 +01:00
|
|
|
return create_json_response({'error': 'invalid cookie', 'invalid fields': l_invalid_cookie}, 400)
|
2020-03-23 18:00:09 +01:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
# Create Cookiejar
|
2020-03-27 17:06:26 +01:00
|
|
|
cookiejar_uuid = crawlers.create_cookiejar(user_id, level=level, description=description)
|
2023-02-17 14:50:20 +01:00
|
|
|
|
|
|
|
# Create Cookies
|
|
|
|
if json_cookies: # TODO CHECK Import
|
|
|
|
res = crawlers.api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies)
|
2020-03-27 17:06:26 +01:00
|
|
|
if res:
|
|
|
|
return create_json_response(res[0], res[1])
|
2023-02-17 14:50:20 +01:00
|
|
|
for cookie_dict in l_manual_cookie:
|
|
|
|
crawlers.api_create_cookie(user_id, cookiejar_uuid, cookie_dict)
|
|
|
|
|
|
|
|
return redirect(url_for('crawler_splash.crawler_cookiejar_show', uuid=cookiejar_uuid))
|
2020-04-01 09:58:47 +02:00
|
|
|
|
2020-03-23 18:00:09 +01:00
|
|
|
|
2020-03-27 17:06:26 +01:00
|
|
|
@crawler_splash.route('/crawler/cookiejar/all', methods=['GET'])
|
2022-03-07 15:12:01 +01:00
|
|
|
@login_required
|
|
|
|
@login_read_only
|
2020-03-27 17:06:26 +01:00
|
|
|
def crawler_cookiejar_all():
|
2020-03-24 17:15:43 +01:00
|
|
|
user_id = current_user.get_id()
|
2023-02-17 14:50:20 +01:00
|
|
|
user_cookiejars = crawlers.get_cookiejars_meta_by_iterator(crawlers.get_cookiejars_user(user_id))
|
|
|
|
global_cookiejars = crawlers.get_cookiejars_meta_by_iterator(crawlers.get_cookiejars_global())
|
|
|
|
return render_template("all_cookiejar.html", user_cookiejar=user_cookiejars, global_cookiejar=global_cookiejars)
|
|
|
|
|
2020-03-24 17:15:43 +01:00
|
|
|
|
2020-03-27 17:06:26 +01:00
|
|
|
@crawler_splash.route('/crawler/cookiejar/show', methods=['GET'])
|
2022-03-07 15:12:01 +01:00
|
|
|
@login_required
|
|
|
|
@login_read_only
|
2020-03-27 17:06:26 +01:00
|
|
|
def crawler_cookiejar_show():
|
2020-03-24 17:15:43 +01:00
|
|
|
user_id = current_user.get_id()
|
2023-02-17 14:50:20 +01:00
|
|
|
cookiejar_uuid = request.args.get('uuid')
|
2020-03-27 17:06:26 +01:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
res = crawlers.api_get_cookiejar(cookiejar_uuid, user_id)
|
|
|
|
if res[1] != 200:
|
2020-03-24 17:15:43 +01:00
|
|
|
return create_json_response(res[0], res[1])
|
2023-02-17 14:50:20 +01:00
|
|
|
else:
|
|
|
|
cookiejar_meta = res[0]
|
2020-03-27 17:06:26 +01:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
return render_template("show_cookiejar.html", cookiejar_metadata=cookiejar_meta)
|
2020-03-27 17:06:26 +01:00
|
|
|
|
2020-04-01 09:58:47 +02:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
@crawler_splash.route('/crawler/cookie/delete', methods=['GET'])
|
2022-03-07 15:12:01 +01:00
|
|
|
@login_required
|
2023-03-14 17:36:42 +01:00
|
|
|
@login_analyst
|
2020-04-01 09:58:47 +02:00
|
|
|
def crawler_cookiejar_cookie_delete():
|
|
|
|
user_id = current_user.get_id()
|
2023-02-17 14:50:20 +01:00
|
|
|
cookie_uuid = request.args.get('uuid')
|
2020-04-01 09:58:47 +02:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
res = crawlers.api_delete_cookie(user_id, cookie_uuid)
|
|
|
|
if res[1] != 200:
|
2020-04-01 09:58:47 +02:00
|
|
|
return create_json_response(res[0], res[1])
|
2023-02-17 14:50:20 +01:00
|
|
|
else:
|
|
|
|
cookiejar_uuid = res[0]['cookiejar_uuid']
|
|
|
|
return redirect(url_for('crawler_splash.crawler_cookiejar_show', uuid=cookiejar_uuid))
|
|
|
|
|
2020-04-01 09:58:47 +02:00
|
|
|
|
|
|
|
@crawler_splash.route('/crawler/cookiejar/delete', methods=['GET'])
|
2022-03-07 15:12:01 +01:00
|
|
|
@login_required
|
2023-03-14 17:36:42 +01:00
|
|
|
@login_analyst
|
2020-04-01 09:58:47 +02:00
|
|
|
def crawler_cookiejar_delete():
|
|
|
|
user_id = current_user.get_id()
|
2023-02-17 14:50:20 +01:00
|
|
|
cookiejar_uuid = request.args.get('uuid')
|
2020-04-01 09:58:47 +02:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
res = crawlers.api_delete_cookiejar(user_id, cookiejar_uuid)
|
|
|
|
if res[1] != 200:
|
2020-04-01 09:58:47 +02:00
|
|
|
return create_json_response(res[0], res[1])
|
|
|
|
return redirect(url_for('crawler_splash.crawler_cookiejar_all'))
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-04-01 09:58:47 +02:00
|
|
|
@crawler_splash.route('/crawler/cookiejar/edit', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawler_cookiejar_edit():
|
|
|
|
user_id = current_user.get_id()
|
2023-02-17 14:50:20 +01:00
|
|
|
cookiejar_uuid = request.args.get('uuid')
|
2020-04-01 09:58:47 +02:00
|
|
|
description = request.args.get('description')
|
|
|
|
|
|
|
|
res = crawlers.api_edit_cookiejar_description(user_id, cookiejar_uuid, description)
|
|
|
|
return create_json_response(res[0], res[1])
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
|
|
|
@crawler_splash.route('/crawler/cookie/edit', methods=['GET'])
|
2020-04-01 09:58:47 +02:00
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawler_cookiejar_cookie_edit():
|
|
|
|
user_id = current_user.get_id()
|
2023-02-17 14:50:20 +01:00
|
|
|
cookie_uuid = request.args.get('uuid')
|
2020-04-01 09:58:47 +02:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
cookie_dict = crawlers.api_get_cookie(user_id, cookie_uuid)
|
|
|
|
return render_template("edit_cookie.html", cookie_uuid=cookie_uuid, cookie_dict=cookie_dict)
|
2020-04-01 09:58:47 +02:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
|
|
|
@crawler_splash.route('/crawler/cookie/edit_post', methods=['POST'])
|
2020-04-01 09:58:47 +02:00
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawler_cookiejar_cookie_edit_post():
|
|
|
|
user_id = current_user.get_id()
|
|
|
|
cookie_uuid = request.form.get('cookie_uuid')
|
|
|
|
name = request.form.get('name')
|
|
|
|
value = request.form.get('value')
|
|
|
|
domain = request.form.get('domain')
|
|
|
|
path = request.form.get('path')
|
|
|
|
httpOnly = request.form.get('httpOnly')
|
|
|
|
secure = request.form.get('secure')
|
|
|
|
|
|
|
|
cookie_dict = {'name': name, 'value': value}
|
|
|
|
if domain:
|
|
|
|
cookie_dict['domain'] = domain
|
|
|
|
if path:
|
|
|
|
cookie_dict['path'] = path
|
|
|
|
if httpOnly:
|
|
|
|
cookie_dict['httpOnly'] = True
|
|
|
|
if secure:
|
|
|
|
cookie_dict['secure'] = True
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
res = crawlers.api_edit_cookie(user_id, cookie_uuid, cookie_dict)
|
2020-04-01 09:58:47 +02:00
|
|
|
if res[1] != 200:
|
|
|
|
return create_json_response(res[0], res[1])
|
2023-02-17 14:50:20 +01:00
|
|
|
cookie = crawlers.Cookie(cookie_uuid)
|
|
|
|
cookiejar_uuid = cookie.get_cookiejar()
|
|
|
|
return redirect(url_for('crawler_splash.crawler_cookiejar_show', uuid=cookiejar_uuid))
|
|
|
|
|
2020-04-01 09:58:47 +02:00
|
|
|
|
|
|
|
@crawler_splash.route('/crawler/cookiejar/cookie/add', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawler_cookiejar_cookie_add():
|
|
|
|
cookiejar_uuid = request.args.get('cookiejar_uuid')
|
|
|
|
return render_template("add_cookie.html", cookiejar_uuid=cookiejar_uuid)
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-04-01 09:58:47 +02:00
|
|
|
@crawler_splash.route('/crawler/cookiejar/cookie/manual_add_post', methods=['POST'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawler_cookiejar_cookie_manual_add_post():
|
|
|
|
user_id = current_user.get_id()
|
|
|
|
cookiejar_uuid = request.form.get('cookiejar_uuid')
|
|
|
|
name = request.form.get('name')
|
|
|
|
value = request.form.get('value')
|
|
|
|
domain = request.form.get('domain')
|
|
|
|
path = request.form.get('path')
|
|
|
|
httpOnly = request.form.get('httpOnly')
|
|
|
|
secure = request.form.get('secure')
|
|
|
|
|
|
|
|
cookie_dict = {'name': name, 'value': value}
|
|
|
|
if domain:
|
|
|
|
cookie_dict['domain'] = domain
|
|
|
|
if path:
|
|
|
|
cookie_dict['path'] = path
|
|
|
|
if httpOnly:
|
|
|
|
cookie_dict['httpOnly'] = True
|
|
|
|
if secure:
|
|
|
|
cookie_dict['secure'] = True
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
res = crawlers.api_create_cookie(user_id, cookiejar_uuid, cookie_dict)
|
|
|
|
if res[1] != 200:
|
|
|
|
return create_json_response(res[0], res[1])
|
|
|
|
|
2020-04-01 09:58:47 +02:00
|
|
|
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2020-04-01 09:58:47 +02:00
|
|
|
@crawler_splash.route('/crawler/cookiejar/cookie/json_add_post', methods=['POST'])
|
|
|
|
@login_required
|
|
|
|
@login_read_only
|
|
|
|
def crawler_cookiejar_cookie_json_add_post():
|
|
|
|
user_id = current_user.get_id()
|
|
|
|
cookiejar_uuid = request.form.get('cookiejar_uuid')
|
|
|
|
|
|
|
|
if 'file' in request.files:
|
|
|
|
file = request.files['file']
|
|
|
|
json_cookies = file.read().decode()
|
|
|
|
if json_cookies:
|
2023-02-17 14:50:20 +01:00
|
|
|
res = crawlers.api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies)
|
|
|
|
if res[1] != 200:
|
|
|
|
return create_json_response(res[0], res[1])
|
|
|
|
|
2020-04-01 09:58:47 +02:00
|
|
|
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
|
|
|
|
|
|
|
|
return redirect(url_for('crawler_splash.crawler_cookiejar_cookie_add', cookiejar_uuid=cookiejar_uuid))
|
2020-03-27 17:06:26 +01:00
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
|
|
|
# --- Cookiejar ---#
|
2020-08-18 19:10:38 +02:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
#### LACUS ####
|
|
|
|
|
|
|
|
@crawler_splash.route('/crawler/settings', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_analyst
|
|
|
|
def crawler_settings():
|
|
|
|
lacus_url = crawlers.get_lacus_url()
|
|
|
|
api_key = crawlers.get_hidden_lacus_api_key()
|
2023-02-21 12:22:49 +01:00
|
|
|
nb_captures = crawlers.get_crawler_max_captures()
|
2022-10-25 16:25:19 +02:00
|
|
|
|
|
|
|
is_manager_connected = crawlers.get_lacus_connection_metadata(force_ping=True)
|
|
|
|
is_crawler_working = crawlers.is_test_ail_crawlers_successful()
|
|
|
|
crawler_error_mess = crawlers.get_test_ail_crawlers_message()
|
|
|
|
|
|
|
|
# TODO REGISTER PROXY
|
|
|
|
# all_proxies = crawlers.get_all_proxies_metadata()
|
|
|
|
|
|
|
|
# crawler_full_config = Config_DB.get_full_config_by_section('crawler')
|
|
|
|
|
|
|
|
return render_template("settings_crawler.html",
|
2023-02-17 14:50:20 +01:00
|
|
|
is_manager_connected=is_manager_connected,
|
|
|
|
lacus_url=lacus_url, api_key=api_key,
|
2023-02-21 12:22:49 +01:00
|
|
|
nb_captures=nb_captures,
|
2023-02-17 14:50:20 +01:00
|
|
|
# all_proxies=all_proxies,
|
|
|
|
is_crawler_working=is_crawler_working,
|
|
|
|
crawler_error_mess=crawler_error_mess,
|
|
|
|
)
|
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
|
|
|
|
@crawler_splash.route('/crawler/settings/crawler/manager', methods=['GET', 'POST'])
|
|
|
|
@login_required
|
|
|
|
@login_admin
|
|
|
|
def crawler_lacus_settings_crawler_manager():
|
|
|
|
if request.method == 'POST':
|
|
|
|
lacus_url = request.form.get('lacus_url')
|
|
|
|
api_key = request.form.get('api_key')
|
|
|
|
|
|
|
|
res = crawlers.api_save_lacus_url_key({'url': lacus_url, 'api_key': api_key})
|
2023-03-14 17:36:42 +01:00
|
|
|
# print(res)
|
2022-10-25 16:25:19 +02:00
|
|
|
if res[1] != 200:
|
|
|
|
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
|
|
|
|
else:
|
|
|
|
return redirect(url_for('crawler_splash.crawler_settings'))
|
|
|
|
else:
|
|
|
|
lacus_url = crawlers.get_lacus_url()
|
|
|
|
api_key = crawlers.get_lacus_api_key()
|
|
|
|
return render_template("settings_edit_lacus_crawler.html", lacus_url=lacus_url, api_key=api_key)
|
|
|
|
|
2023-02-21 12:22:49 +01:00
|
|
|
@crawler_splash.route('/crawler/settings/crawlers_to_launch', methods=['GET', 'POST'])
|
|
|
|
@login_required
|
|
|
|
@login_admin
|
|
|
|
def crawler_settings_crawlers_to_launch():
|
|
|
|
if request.method == 'POST':
|
|
|
|
nb_captures = request.form.get('nb_captures')
|
|
|
|
res = crawlers.api_set_crawler_max_captures({'nb': nb_captures})
|
|
|
|
if res[1] != 200:
|
|
|
|
return create_json_response(res[0], res[1])
|
|
|
|
else:
|
|
|
|
return redirect(url_for('crawler_splash.crawler_settings'))
|
|
|
|
else:
|
|
|
|
nb_captures = crawlers.get_crawler_max_captures()
|
|
|
|
return render_template("settings_edit_crawlers_to_launch.html",
|
|
|
|
nb_captures=nb_captures)
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
@crawler_splash.route('/crawler/settings/crawler/test', methods=['GET'])
|
|
|
|
@login_required
|
|
|
|
@login_admin
|
|
|
|
def crawler_settings_crawler_test():
|
|
|
|
crawlers.test_ail_crawlers()
|
|
|
|
return redirect(url_for('crawler_splash.crawler_settings'))
|
|
|
|
|
2023-02-17 14:50:20 +01:00
|
|
|
# --- LACUS ---#
|