#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' Flask functions and routes for the trending modules page ''' import redis import datetime import sys import os import time import json from pyfaup.faup import Faup from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for from Date import Date from HiddenServices import HiddenServices # ============ VARIABLES ============ import Flask_config app = Flask_config.app cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl r_cache = Flask_config.r_cache r_serv_onion = Flask_config.r_serv_onion r_serv_metadata = Flask_config.r_serv_metadata bootstrap_label = Flask_config.bootstrap_label hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates') faup = Faup() list_types=['onion', 'regular'] dic_type_name={'onion':'Onion', 'regular':'Website'} # ============ FUNCTIONS ============ def one(): return 1 def get_date_range(num_day): curr_date = datetime.date.today() date = Date( '{}{}{}'.format(str(curr_date.year), str(curr_date.month).zfill(2), str(curr_date.day).zfill(2)) ) date_list = [] for i in range(0, num_day): date_list.append(date.substract_day(i)) return list(reversed(date_list)) def substract_date(date_from, date_to): date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) delta = date_to - date_from # timedelta l_date = [] for i in range(delta.days + 1): date = date_from + datetime.timedelta(i) l_date.append( date.strftime('%Y%m%d') ) return l_date def unpack_paste_tags(p_tags): l_tags = [] for tag in p_tags: complete_tag = tag tag = tag.split('=') if len(tag) > 1: if tag[1] != '': tag = tag[1][1:-1] # no value else: tag = tag[0][1:-1] # use for custom tags else: tag = tag[0] l_tags.append( (tag, complete_tag) ) return l_tags def is_valid_domain(domain): faup.decode(domain) domain_unpack = faup.get() if domain_unpack['tld'] is not None and domain_unpack['scheme'] is None and domain_unpack['port'] is None and domain_unpack['query_string'] is None: return True else: return False def get_onion_status(domain, date): if r_serv_onion.sismember('onion_up:'+date , domain): return True else: return False def get_domain_type(domain): type_id = domain.split(':')[-1] if type_id == 'onion': return 'onion' else: return 'regular' def get_type_domain(domain): if domain is None: type = 'regular' else: if domain.rsplit('.', 1)[1] == 'onion': type = 'onion' else: type = 'regular' return type def get_last_domains_crawled(type): return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1) def get_stats_last_crawled_domains(type, date): statDomains = {} statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date)) statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date)) statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] statDomains['domains_queue'] = r_serv_onion.scard('{}_crawler_queue'.format(type)) statDomains['domains_queue'] += r_serv_onion.scard('{}_crawler_priority_queue'.format(type)) return statDomains def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None): list_crawled_metadata = [] for domain_epoch in list_domains_crawled: domain, epoch = domain_epoch.rsplit(';', 1) domain = domain.split(':') if len(domain) == 1: port = 80 domain = domain[0] else: port = domain[1] domain = domain[0] metadata_domain = {} # get Domain type if type is None: type = get_domain_type(domain) metadata_domain['domain'] = domain if len(domain) > 45: domain_name, tld_domain = domain.rsplit('.', 1) metadata_domain['domain_name'] = '{}[...].{}'.format(domain_name[:40], tld_domain) else: metadata_domain['domain_name'] = domain metadata_domain['port'] = port metadata_domain['epoch'] = epoch metadata_domain['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check') if metadata_domain['last_check'] is None: metadata_domain['last_check'] = '********' metadata_domain['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'first_seen') if metadata_domain['first_seen'] is None: metadata_domain['first_seen'] = '********' if r_serv_onion.sismember('{}_up:{}'.format(type, metadata_domain['last_check']) , domain): metadata_domain['status_text'] = 'UP' metadata_domain['status_color'] = 'Green' metadata_domain['status_icon'] = 'fa-check-circle' else: metadata_domain['status_text'] = 'DOWN' metadata_domain['status_color'] = 'Red' metadata_domain['status_icon'] = 'fa-times-circle' list_crawled_metadata.append(metadata_domain) return list_crawled_metadata def get_crawler_splash_status(type): crawler_metadata = [] all_crawlers = r_cache.smembers('{}_crawlers'.format(type)) for crawler in all_crawlers: crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain') started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time') status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status') crawler_info = '{} - {}'.format(crawler, started_time) if status_info=='Waiting' or status_info=='Crawling': status=True else: status=False crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) return crawler_metadata def create_crawler_config(mode, service_type, crawler_config, domain): if mode == 'manual': r_cache.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config)) elif mode == 'auto': r_serv_onion.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config)) def send_url_to_crawl_in_queue(mode, service_type, url): r_serv_onion.sadd('{}_crawler_priority_queue'.format(service_type), '{};{}'.format(url, mode)) # add auto crawled url for user UI if mode == 'auto': r_serv_onion.sadd('auto_crawler_url:{}'.format(service_type), url) # ============= ROUTES ============== @hiddenServices.route("/crawlers/", methods=['GET']) def dashboard(): return render_template("Crawler_dashboard.html") @hiddenServices.route("/hiddenServices/2", methods=['GET']) def hiddenServices_page_test(): return render_template("Crawler_index.html") @hiddenServices.route("/crawlers/manual", methods=['GET']) def manual(): return render_template("Crawler_Splash_manual.html") @hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET']) def crawler_splash_onion(): type = 'onion' last_onions = get_last_domains_crawled(type) list_onion = [] now = datetime.datetime.now() date = now.strftime("%Y%m%d") statDomains = get_stats_last_crawled_domains(type, date) list_onion = get_last_crawled_domains_metadata(last_onions, date, type=type) crawler_metadata = get_crawler_splash_status(type) date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains, crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string) @hiddenServices.route("/crawlers/Crawler_Splash_last_by_type", methods=['GET']) def Crawler_Splash_last_by_type(): type = request.args.get('type') # verify user input if type not in list_types: type = 'onion' type_name = dic_type_name[type] list_domains = [] now = datetime.datetime.now() date = now.strftime("%Y%m%d") date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) statDomains = get_stats_last_crawled_domains(type, date) list_domains = get_last_crawled_domains_metadata(get_last_domains_crawled(type), date, type=type) crawler_metadata = get_crawler_splash_status(type) return render_template("Crawler_Splash_last_by_type.html", type=type, type_name=type_name, last_domains=list_domains, statDomains=statDomains, crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string) @hiddenServices.route("/crawlers/blacklisted_domains", methods=['GET']) def blacklisted_domains(): blacklist_domain = request.args.get('blacklist_domain') unblacklist_domain = request.args.get('unblacklist_domain') type = request.args.get('type') if type in list_types: type_name = dic_type_name[type] if blacklist_domain is not None: blacklist_domain = int(blacklist_domain) if unblacklist_domain is not None: unblacklist_domain = int(unblacklist_domain) try: page = int(request.args.get('page')) except: page = 1 if page <= 0: page = 1 nb_page_max = r_serv_onion.scard('blacklist_{}'.format(type))/(1000) if isinstance(nb_page_max, float): nb_page_max = int(nb_page_max)+1 if page > nb_page_max: page = nb_page_max start = 1000*(page -1) stop = 1000*page list_blacklisted = list(r_serv_onion.smembers('blacklist_{}'.format(type))) list_blacklisted_1 = list_blacklisted[start:stop] list_blacklisted_2 = list_blacklisted[stop:stop+1000] return render_template("blacklisted_domains.html", list_blacklisted_1=list_blacklisted_1, list_blacklisted_2=list_blacklisted_2, type=type, type_name=type_name, page=page, nb_page_max=nb_page_max, blacklist_domain=blacklist_domain, unblacklist_domain=unblacklist_domain) else: return 'Incorrect Type' @hiddenServices.route("/crawler/blacklist_domain", methods=['GET']) def blacklist_domain(): domain = request.args.get('domain') type = request.args.get('type') try: page = int(request.args.get('page')) except: page = 1 if type in list_types: if is_valid_domain(domain): res = r_serv_onion.sadd('blacklist_{}'.format(type), domain) if page: if res == 0: return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=2)) else: return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=1)) else: return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=0)) else: return 'Incorrect type' @hiddenServices.route("/crawler/unblacklist_domain", methods=['GET']) def unblacklist_domain(): domain = request.args.get('domain') type = request.args.get('type') try: page = int(request.args.get('page')) except: page = 1 if type in list_types: if is_valid_domain(domain): res = r_serv_onion.srem('blacklist_{}'.format(type), domain) if page: if res == 0: return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=2)) else: return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=1)) else: return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=0)) else: return 'Incorrect type' @hiddenServices.route("/crawlers/create_spider_splash", methods=['POST']) def create_spider_splash(): url = request.form.get('url_to_crawl') automatic = request.form.get('crawler_type') crawler_time = request.form.get('crawler_epoch') #html = request.form.get('html_content_id') screenshot = request.form.get('screenshot') har = request.form.get('har') depth_limit = request.form.get('depth_limit') max_pages = request.form.get('max_pages') # validate url if url is None or url=='' or url=='\n': return 'incorrect url' crawler_config = {} # verify user input if automatic: automatic = True else: automatic = False if not screenshot: crawler_config['png'] = 0 if not har: crawler_config['har'] = 0 # verify user input if depth_limit: try: depth_limit = int(depth_limit) if depth_limit < 0: return 'incorrect depth_limit' else: crawler_config['depth_limit'] = depth_limit except: return 'incorrect depth_limit' if max_pages: try: max_pages = int(max_pages) if max_pages < 1: return 'incorrect max_pages' else: crawler_config['closespider_pagecount'] = max_pages except: return 'incorrect max_pages' # get service_type faup.decode(url) unpack_url = faup.get() domain = unpack_url['domain'].decode() if unpack_url['tld'] == b'onion': service_type = 'onion' else: service_type = 'regular' if automatic: mode = 'auto' try: crawler_time = int(crawler_time) if crawler_time < 0: return 'incorrect epoch' else: crawler_config['time'] = crawler_time except: return 'incorrect epoch' else: mode = 'manual' epoch = None create_crawler_config(mode, service_type, crawler_config, domain) send_url_to_crawl_in_queue(mode, service_type, url) return redirect(url_for('hiddenServices.manual')) # # TODO: refractor @hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET']) def last_crawled_domains_with_stats_json(): last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) list_onion = [] now = datetime.datetime.now() date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d")) statDomains = {} statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue') for onion in last_onions: metadata_onion = {} metadata_onion['domain'] = onion metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check') if metadata_onion['last_check'] is None: metadata_onion['last_check'] = '********' metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen') if metadata_onion['first_seen'] is None: metadata_onion['first_seen'] = '********' if get_onion_status(onion, metadata_onion['last_check']): metadata_onion['status_text'] = 'UP' metadata_onion['status_color'] = 'Green' metadata_onion['status_icon'] = 'fa-check-circle' else: metadata_onion['status_text'] = 'DOWN' metadata_onion['status_color'] = 'Red' metadata_onion['status_icon'] = 'fa-times-circle' list_onion.append(metadata_onion) crawler_metadata=[] all_onion_crawler = r_cache.smembers('all_crawler:onion') for crawler in all_onion_crawler: crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain') started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time') status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status') crawler_info = '{} - {}'.format(crawler, started_time) if status_info=='Waiting' or status_info=='Crawling': status=True else: status=False crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata}) @hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST']) def get_onions_by_daterange(): date_from = request.form.get('date_from') date_to = request.form.get('date_to') domains_up = request.form.get('domains_up') domains_down = request.form.get('domains_down') domains_tags = request.form.get('domains_tags') return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags)) @hiddenServices.route("/hiddenServices/show_domains_by_daterange", methods=['GET']) def show_domains_by_daterange(): date_from = request.args.get('date_from') date_to = request.args.get('date_to') domains_up = request.args.get('domains_up') domains_down = request.args.get('domains_down') domains_tags = request.args.get('domains_tags') date_range = [] if date_from is not None and date_to is not None: #change format try: if len(date_from) != 8: date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] date_range = substract_date(date_from, date_to) except: pass if not date_range: date_range.append(datetime.date.today().strftime("%Y%m%d")) date_from = date_range[0][0:4] + '-' + date_range[0][4:6] + '-' + date_range[0][6:8] date_to = date_from else: date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8] date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8] domains_by_day = {} domain_metadata = {} for date in date_range: if domains_up: domains_up = True domains_by_day[date] = list(r_serv_onion.smembers('onion_up:{}'.format(date))) for domain in domains_by_day[date]: h = HiddenServices(domain, 'onion') domain_metadata[domain] = {} if domains_tags: domains_tags = True domain_metadata[domain]['tags'] = h.get_domain_tags(update=True) domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check') if domain_metadata[domain]['last_check'] is None: domain_metadata[domain]['last_check'] = '********' domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen') if domain_metadata[domain]['first_seen'] is None: domain_metadata[domain]['first_seen'] = '********' domain_metadata[domain]['status_text'] = 'UP' domain_metadata[domain]['status_color'] = 'Green' domain_metadata[domain]['status_icon'] = 'fa-check-circle' if domains_down: domains_down = True domains_by_day_down = list(r_serv_onion.smembers('onion_down:{}'.format(date))) if domains_up: domains_by_day[date].extend(domains_by_day_down) else: domains_by_day[date] = domains_by_day_down for domain in domains_by_day_down: #h = HiddenServices(onion_domain, 'onion') domain_metadata[domain] = {} #domain_metadata[domain]['tags'] = h.get_domain_tags() domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check') if domain_metadata[domain]['last_check'] is None: domain_metadata[domain]['last_check'] = '********' domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen') if domain_metadata[domain]['first_seen'] is None: domain_metadata[domain]['first_seen'] = '********' domain_metadata[domain]['status_text'] = 'DOWN' domain_metadata[domain]['status_color'] = 'Red' domain_metadata[domain]['status_icon'] = 'fa-times-circle' return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day, domain_metadata=domain_metadata, date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags, bootstrap_label=bootstrap_label) @hiddenServices.route("/crawlers/show_domain", methods=['GET']) def show_domain(): domain = request.args.get('domain') epoch = request.args.get('epoch') try: epoch = int(epoch) except: epoch = None port = request.args.get('port') faup.decode(domain) unpack_url = faup.get() domain = unpack_url['domain'].decode() if not port: port = unpack_url['port'].decode() try: port = int(port) except: port = 80 type = get_type_domain(domain) if domain is None or not r_serv_onion.exists('{}_metadata:{}'.format(type, domain)): return '404' # # TODO: FIXME return 404 last_check = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'last_check') if last_check is None: last_check = '********' last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) first_seen = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'first_seen') if first_seen is None: first_seen = '********' first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8]) origin_paste = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'paste_parent') h = HiddenServices(domain, type, port=port) item_core = h.get_domain_crawled_core_item(epoch=epoch) if item_core: l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item']) else: l_pastes = [] dict_links = h.get_all_links(l_pastes) if l_pastes: status = True else: status = False last_check = '{} - {}'.format(last_check, time.strftime('%H:%M.%S', time.gmtime(epoch))) screenshot = h.get_domain_random_screenshot(l_pastes) if screenshot: screenshot = screenshot[0] else: screenshot = 'None' domain_tags = h.get_domain_tags() origin_paste_name = h.get_origin_paste_name() origin_paste_tags = unpack_paste_tags(r_serv_metadata.smembers('tag:{}'.format(origin_paste))) paste_tags = [] for path in l_pastes: p_tags = r_serv_metadata.smembers('tag:'+path) paste_tags.append(unpack_paste_tags(p_tags)) return render_template("showDomain.html", domain=domain, last_check=last_check, first_seen=first_seen, l_pastes=l_pastes, paste_tags=paste_tags, bootstrap_label=bootstrap_label, dict_links=dict_links, origin_paste_tags=origin_paste_tags, status=status, origin_paste=origin_paste, origin_paste_name=origin_paste_name, domain_tags=domain_tags, screenshot=screenshot) @hiddenServices.route("/hiddenServices/onion_son", methods=['GET']) def onion_son(): onion_domain = request.args.get('onion_domain') h = HiddenServices(onion_domain, 'onion') l_pastes = h.get_last_crawled_pastes() l_son = h.get_domain_son(l_pastes) return 'l_son' # ============= JSON ============== @hiddenServices.route("/hiddenServices/domain_crawled_7days_json", methods=['GET']) def domain_crawled_7days_json(): type = 'onion' ## TODO: # FIXME: 404 error date_range = get_date_range(7) json_domain_stats = [] #try: for date in date_range: nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date)) nb_domain_down = r_serv_onion.scard('{}_up:{}'.format(type, date)) date = date[0:4] + '-' + date[4:6] + '-' + date[6:8] json_domain_stats.append({ 'date': date, 'value': int( nb_domain_up ), 'nb_domain_down': int( nb_domain_down )}) #except: #return jsonify() return jsonify(json_domain_stats) @hiddenServices.route('/hiddenServices/domain_crawled_by_type_json') def domain_crawled_by_type_json(): current_date = request.args.get('date') type = request.args.get('type') if type in list_types: num_day_type = 7 date_range = get_date_range(num_day_type) range_decoder = [] for date in date_range: day_crawled = {} day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8] day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date)) day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date)) range_decoder.append(day_crawled) return jsonify(range_decoder) else: return jsonify('Incorrect Type') # ========= REGISTRATION ========= app.register_blueprint(hiddenServices, url_prefix=baseUrl)