AIL-framework/var/www/modules/hiddenServices/Flask_hiddenServices.py

644 lines
25 KiB
Python

#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
Flask functions and routes for the trending modules page
'''
import redis
import datetime
import sys
import os
import time
import json
from pyfaup.faup import Faup
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for
from Date import Date
from HiddenServices import HiddenServices
# ============ VARIABLES ============
import Flask_config
app = Flask_config.app
cfg = Flask_config.cfg
baseUrl = Flask_config.baseUrl
r_cache = Flask_config.r_cache
r_serv_onion = Flask_config.r_serv_onion
r_serv_metadata = Flask_config.r_serv_metadata
bootstrap_label = Flask_config.bootstrap_label
hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
faup = Faup()
list_types=['onion', 'regular']
dic_type_name={'onion':'Onion', 'regular':'Website'}
# ============ FUNCTIONS ============
def one():
return 1
def get_date_range(num_day):
curr_date = datetime.date.today()
date = Date( '{}{}{}'.format(str(curr_date.year), str(curr_date.month).zfill(2), str(curr_date.day).zfill(2)) )
date_list = []
for i in range(0, num_day):
date_list.append(date.substract_day(i))
return list(reversed(date_list))
def substract_date(date_from, date_to):
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
delta = date_to - date_from # timedelta
l_date = []
for i in range(delta.days + 1):
date = date_from + datetime.timedelta(i)
l_date.append( date.strftime('%Y%m%d') )
return l_date
def unpack_paste_tags(p_tags):
l_tags = []
for tag in p_tags:
complete_tag = tag
tag = tag.split('=')
if len(tag) > 1:
if tag[1] != '':
tag = tag[1][1:-1]
# no value
else:
tag = tag[0][1:-1]
# use for custom tags
else:
tag = tag[0]
l_tags.append( (tag, complete_tag) )
return l_tags
def is_valid_domain(domain):
faup.decode(domain)
domain_unpack = faup.get()
if domain_unpack['tld'] is not None and domain_unpack['scheme'] is None and domain_unpack['port'] is None and domain_unpack['query_string'] is None:
return True
else:
return False
def get_onion_status(domain, date):
if r_serv_onion.sismember('onion_up:'+date , domain):
return True
else:
return False
def get_domain_type(domain):
type_id = domain.split(':')[-1]
if type_id == 'onion':
return 'onion'
else:
return 'regular'
def get_type_domain(domain):
if domain is None:
type = 'regular'
else:
if domain.rsplit('.', 1)[1] == 'onion':
type = 'onion'
else:
type = 'regular'
return type
def get_last_domains_crawled(type):
return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1)
def get_stats_last_crawled_domains(type, date):
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date))
statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = r_serv_onion.scard('{}_crawler_queue'.format(type))
statDomains['domains_queue'] += r_serv_onion.scard('{}_crawler_priority_queue'.format(type))
return statDomains
def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None):
list_crawled_metadata = []
for domain_epoch in list_domains_crawled:
domain, epoch = domain_epoch.rsplit(';', 1)
domain = domain.split(':')
if len(domain) == 1:
port = 80
domain = domain[0]
else:
port = domain[1]
domain = domain[0]
metadata_domain = {}
# get Domain type
if type is None:
type = get_domain_type(domain)
metadata_domain['domain'] = domain
if len(domain) > 45:
domain_name, tld_domain = domain.rsplit('.', 1)
metadata_domain['domain_name'] = '{}[...].{}'.format(domain_name[:40], tld_domain)
else:
metadata_domain['domain_name'] = domain
metadata_domain['port'] = port
metadata_domain['epoch'] = epoch
metadata_domain['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check')
if metadata_domain['last_check'] is None:
metadata_domain['last_check'] = '********'
metadata_domain['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'first_seen')
if metadata_domain['first_seen'] is None:
metadata_domain['first_seen'] = '********'
if r_serv_onion.sismember('{}_up:{}'.format(type, metadata_domain['last_check']) , domain):
metadata_domain['status_text'] = 'UP'
metadata_domain['status_color'] = 'Green'
metadata_domain['status_icon'] = 'fa-check-circle'
else:
metadata_domain['status_text'] = 'DOWN'
metadata_domain['status_color'] = 'Red'
metadata_domain['status_icon'] = 'fa-times-circle'
list_crawled_metadata.append(metadata_domain)
return list_crawled_metadata
def get_crawler_splash_status(type):
crawler_metadata = []
all_crawlers = r_cache.smembers('{}_crawlers'.format(type))
for crawler in all_crawlers:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
return crawler_metadata
def create_crawler_config(mode, service_type, crawler_config, domain):
if mode == 'manual':
r_cache.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config))
elif mode == 'auto':
r_serv_onion.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config))
def send_url_to_crawl_in_queue(mode, service_type, url):
r_serv_onion.sadd('{}_crawler_priority_queue'.format(service_type), '{};{}'.format(url, mode))
# add auto crawled url for user UI
if mode == 'auto':
r_serv_onion.sadd('auto_crawler_url:{}'.format(service_type), url)
# ============= ROUTES ==============
@hiddenServices.route("/crawlers/", methods=['GET'])
def dashboard():
return render_template("Crawler_dashboard.html")
@hiddenServices.route("/hiddenServices/2", methods=['GET'])
def hiddenServices_page_test():
return render_template("Crawler_index.html")
@hiddenServices.route("/crawlers/manual", methods=['GET'])
def manual():
return render_template("Crawler_Splash_manual.html")
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
def crawler_splash_onion():
type = 'onion'
last_onions = get_last_domains_crawled(type)
list_onion = []
now = datetime.datetime.now()
date = now.strftime("%Y%m%d")
statDomains = get_stats_last_crawled_domains(type, date)
list_onion = get_last_crawled_domains_metadata(last_onions, date, type=type)
crawler_metadata = get_crawler_splash_status(type)
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/crawlers/Crawler_Splash_last_by_type", methods=['GET'])
def Crawler_Splash_last_by_type():
type = request.args.get('type')
# verify user input
if type not in list_types:
type = 'onion'
type_name = dic_type_name[type]
list_domains = []
now = datetime.datetime.now()
date = now.strftime("%Y%m%d")
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
statDomains = get_stats_last_crawled_domains(type, date)
list_domains = get_last_crawled_domains_metadata(get_last_domains_crawled(type), date, type=type)
crawler_metadata = get_crawler_splash_status(type)
return render_template("Crawler_Splash_last_by_type.html", type=type, type_name=type_name,
last_domains=list_domains, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
@hiddenServices.route("/crawlers/blacklisted_domains", methods=['GET'])
def blacklisted_domains():
blacklist_domain = request.args.get('blacklist_domain')
unblacklist_domain = request.args.get('unblacklist_domain')
type = request.args.get('type')
if type in list_types:
type_name = dic_type_name[type]
if blacklist_domain is not None:
blacklist_domain = int(blacklist_domain)
if unblacklist_domain is not None:
unblacklist_domain = int(unblacklist_domain)
try:
page = int(request.args.get('page'))
except:
page = 1
if page <= 0:
page = 1
nb_page_max = r_serv_onion.scard('blacklist_{}'.format(type))/(1000)
if isinstance(nb_page_max, float):
nb_page_max = int(nb_page_max)+1
if page > nb_page_max:
page = nb_page_max
start = 1000*(page -1)
stop = 1000*page
list_blacklisted = list(r_serv_onion.smembers('blacklist_{}'.format(type)))
list_blacklisted_1 = list_blacklisted[start:stop]
list_blacklisted_2 = list_blacklisted[stop:stop+1000]
return render_template("blacklisted_domains.html", list_blacklisted_1=list_blacklisted_1, list_blacklisted_2=list_blacklisted_2,
type=type, type_name=type_name, page=page, nb_page_max=nb_page_max,
blacklist_domain=blacklist_domain, unblacklist_domain=unblacklist_domain)
else:
return 'Incorrect Type'
@hiddenServices.route("/crawler/blacklist_domain", methods=['GET'])
def blacklist_domain():
domain = request.args.get('domain')
type = request.args.get('type')
try:
page = int(request.args.get('page'))
except:
page = 1
if type in list_types:
if is_valid_domain(domain):
res = r_serv_onion.sadd('blacklist_{}'.format(type), domain)
if page:
if res == 0:
return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=2))
else:
return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=1))
else:
return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=0))
else:
return 'Incorrect type'
@hiddenServices.route("/crawler/unblacklist_domain", methods=['GET'])
def unblacklist_domain():
domain = request.args.get('domain')
type = request.args.get('type')
try:
page = int(request.args.get('page'))
except:
page = 1
if type in list_types:
if is_valid_domain(domain):
res = r_serv_onion.srem('blacklist_{}'.format(type), domain)
if page:
if res == 0:
return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=2))
else:
return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=1))
else:
return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=0))
else:
return 'Incorrect type'
@hiddenServices.route("/crawlers/create_spider_splash", methods=['POST'])
def create_spider_splash():
url = request.form.get('url_to_crawl')
automatic = request.form.get('crawler_type')
crawler_time = request.form.get('crawler_epoch')
#html = request.form.get('html_content_id')
screenshot = request.form.get('screenshot')
har = request.form.get('har')
depth_limit = request.form.get('depth_limit')
max_pages = request.form.get('max_pages')
# validate url
if url is None or url=='' or url=='\n':
return 'incorrect url'
crawler_config = {}
# verify user input
if automatic:
automatic = True
else:
automatic = False
if not screenshot:
crawler_config['png'] = 0
if not har:
crawler_config['har'] = 0
# verify user input
if depth_limit:
try:
depth_limit = int(depth_limit)
if depth_limit < 0:
return 'incorrect depth_limit'
else:
crawler_config['depth_limit'] = depth_limit
except:
return 'incorrect depth_limit'
if max_pages:
try:
max_pages = int(max_pages)
if max_pages < 1:
return 'incorrect max_pages'
else:
crawler_config['closespider_pagecount'] = max_pages
except:
return 'incorrect max_pages'
# get service_type
faup.decode(url)
unpack_url = faup.get()
domain = unpack_url['domain'].decode()
if unpack_url['tld'] == b'onion':
service_type = 'onion'
else:
service_type = 'regular'
if automatic:
mode = 'auto'
try:
crawler_time = int(crawler_time)
if crawler_time < 0:
return 'incorrect epoch'
else:
crawler_config['time'] = crawler_time
except:
return 'incorrect epoch'
else:
mode = 'manual'
epoch = None
create_crawler_config(mode, service_type, crawler_config, domain)
send_url_to_crawl_in_queue(mode, service_type, url)
return redirect(url_for('hiddenServices.manual'))
# # TODO: refractor
@hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET'])
def last_crawled_domains_with_stats_json():
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
list_onion = []
now = datetime.datetime.now()
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
statDomains = {}
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
for onion in last_onions:
metadata_onion = {}
metadata_onion['domain'] = onion
metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
if metadata_onion['last_check'] is None:
metadata_onion['last_check'] = '********'
metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
if metadata_onion['first_seen'] is None:
metadata_onion['first_seen'] = '********'
if get_onion_status(onion, metadata_onion['last_check']):
metadata_onion['status_text'] = 'UP'
metadata_onion['status_color'] = 'Green'
metadata_onion['status_icon'] = 'fa-check-circle'
else:
metadata_onion['status_text'] = 'DOWN'
metadata_onion['status_color'] = 'Red'
metadata_onion['status_icon'] = 'fa-times-circle'
list_onion.append(metadata_onion)
crawler_metadata=[]
all_onion_crawler = r_cache.smembers('all_crawler:onion')
for crawler in all_onion_crawler:
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
crawler_info = '{} - {}'.format(crawler, started_time)
if status_info=='Waiting' or status_info=='Crawling':
status=True
else:
status=False
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata})
@hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST'])
def get_onions_by_daterange():
date_from = request.form.get('date_from')
date_to = request.form.get('date_to')
domains_up = request.form.get('domains_up')
domains_down = request.form.get('domains_down')
domains_tags = request.form.get('domains_tags')
return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags))
@hiddenServices.route("/hiddenServices/show_domains_by_daterange", methods=['GET'])
def show_domains_by_daterange():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
domains_up = request.args.get('domains_up')
domains_down = request.args.get('domains_down')
domains_tags = request.args.get('domains_tags')
date_range = []
if date_from is not None and date_to is not None:
#change format
try:
if len(date_from) != 8:
date_from = date_from[0:4] + date_from[5:7] + date_from[8:10]
date_to = date_to[0:4] + date_to[5:7] + date_to[8:10]
date_range = substract_date(date_from, date_to)
except:
pass
if not date_range:
date_range.append(datetime.date.today().strftime("%Y%m%d"))
date_from = date_range[0][0:4] + '-' + date_range[0][4:6] + '-' + date_range[0][6:8]
date_to = date_from
else:
date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8]
date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8]
domains_by_day = {}
domain_metadata = {}
for date in date_range:
if domains_up:
domains_up = True
domains_by_day[date] = list(r_serv_onion.smembers('onion_up:{}'.format(date)))
for domain in domains_by_day[date]:
h = HiddenServices(domain, 'onion')
domain_metadata[domain] = {}
if domains_tags:
domains_tags = True
domain_metadata[domain]['tags'] = h.get_domain_tags(update=True)
domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check')
if domain_metadata[domain]['last_check'] is None:
domain_metadata[domain]['last_check'] = '********'
domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen')
if domain_metadata[domain]['first_seen'] is None:
domain_metadata[domain]['first_seen'] = '********'
domain_metadata[domain]['status_text'] = 'UP'
domain_metadata[domain]['status_color'] = 'Green'
domain_metadata[domain]['status_icon'] = 'fa-check-circle'
if domains_down:
domains_down = True
domains_by_day_down = list(r_serv_onion.smembers('onion_down:{}'.format(date)))
if domains_up:
domains_by_day[date].extend(domains_by_day_down)
else:
domains_by_day[date] = domains_by_day_down
for domain in domains_by_day_down:
#h = HiddenServices(onion_domain, 'onion')
domain_metadata[domain] = {}
#domain_metadata[domain]['tags'] = h.get_domain_tags()
domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check')
if domain_metadata[domain]['last_check'] is None:
domain_metadata[domain]['last_check'] = '********'
domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen')
if domain_metadata[domain]['first_seen'] is None:
domain_metadata[domain]['first_seen'] = '********'
domain_metadata[domain]['status_text'] = 'DOWN'
domain_metadata[domain]['status_color'] = 'Red'
domain_metadata[domain]['status_icon'] = 'fa-times-circle'
return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day, domain_metadata=domain_metadata,
date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down,
domains_tags=domains_tags, bootstrap_label=bootstrap_label)
@hiddenServices.route("/crawlers/show_domain", methods=['GET'])
def show_domain():
domain = request.args.get('domain')
epoch = request.args.get('epoch')
try:
epoch = int(epoch)
except:
epoch = None
port = request.args.get('port')
try:
port = int(port)
except:
port = 80
type = get_type_domain(domain)
if domain is None or not r_serv_onion.exists('{}_metadata:{}'.format(type, domain)):
return '404'
# # TODO: FIXME return 404
last_check = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check')
if last_check is None:
last_check = '********'
last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8])
first_seen = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'first_seen')
if first_seen is None:
first_seen = '********'
first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8])
origin_paste = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'paste_parent')
h = HiddenServices(domain, type, port=port)
item_core = h.get_domain_crawled_core_item(epoch=epoch)
if item_core:
l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
else:
l_pastes = []
dict_links = h.get_all_links(l_pastes)
if l_pastes:
status = True
else:
status = False
last_check = '{} - {}'.format(last_check, time.strftime('%H:%M.%S', time.gmtime(epoch)))
screenshot = h.get_domain_random_screenshot(l_pastes)
if screenshot:
screenshot = screenshot[0]
else:
screenshot = 'None'
domain_tags = h.get_domain_tags()
origin_paste_name = h.get_origin_paste_name()
origin_paste_tags = unpack_paste_tags(r_serv_metadata.smembers('tag:{}'.format(origin_paste)))
paste_tags = []
for path in l_pastes:
p_tags = r_serv_metadata.smembers('tag:'+path)
paste_tags.append(unpack_paste_tags(p_tags))
return render_template("showDomain.html", domain=domain, last_check=last_check, first_seen=first_seen,
l_pastes=l_pastes, paste_tags=paste_tags, bootstrap_label=bootstrap_label,
dict_links=dict_links,
origin_paste_tags=origin_paste_tags, status=status,
origin_paste=origin_paste, origin_paste_name=origin_paste_name,
domain_tags=domain_tags, screenshot=screenshot)
@hiddenServices.route("/hiddenServices/onion_son", methods=['GET'])
def onion_son():
onion_domain = request.args.get('onion_domain')
h = HiddenServices(onion_domain, 'onion')
l_pastes = h.get_last_crawled_pastes()
l_son = h.get_domain_son(l_pastes)
return 'l_son'
# ============= JSON ==============
@hiddenServices.route("/hiddenServices/domain_crawled_7days_json", methods=['GET'])
def domain_crawled_7days_json():
type = 'onion'
## TODO: # FIXME: 404 error
date_range = get_date_range(7)
json_domain_stats = []
#try:
for date in date_range:
nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
nb_domain_down = r_serv_onion.scard('{}_up:{}'.format(type, date))
date = date[0:4] + '-' + date[4:6] + '-' + date[6:8]
json_domain_stats.append({ 'date': date, 'value': int( nb_domain_up ), 'nb_domain_down': int( nb_domain_down )})
#except:
#return jsonify()
return jsonify(json_domain_stats)
@hiddenServices.route('/hiddenServices/domain_crawled_by_type_json')
def domain_crawled_by_type_json():
current_date = request.args.get('date')
type = request.args.get('type')
if type in list_types:
num_day_type = 7
date_range = get_date_range(num_day_type)
range_decoder = []
for date in date_range:
day_crawled = {}
day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8]
day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
range_decoder.append(day_crawled)
return jsonify(range_decoder)
else:
return jsonify('Incorrect Type')
# ========= REGISTRATION =========
app.register_blueprint(hiddenServices, url_prefix=baseUrl)