mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			741 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			741 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			Python
		
	
	
| #!/usr/bin/env python3
 | |
| # -*-coding:UTF-8 -*
 | |
| 
 | |
| '''
 | |
|     Flask functions and routes for the trending modules page
 | |
| '''
 | |
| import redis
 | |
| import datetime
 | |
| import sys
 | |
| import os
 | |
| import time
 | |
| import json
 | |
| from pyfaup.faup import Faup
 | |
| from flask import Flask, render_template, jsonify, request, send_file, Blueprint, redirect, url_for
 | |
| 
 | |
| from Role_Manager import login_admin, login_analyst, login_read_only, no_cache
 | |
| from flask_login import login_required
 | |
| 
 | |
| from Date import Date
 | |
| from HiddenServices import HiddenServices
 | |
| 
 | |
| # ============ VARIABLES ============
 | |
| import Flask_config
 | |
| 
 | |
| app = Flask_config.app
 | |
| baseUrl = Flask_config.baseUrl
 | |
| r_cache = Flask_config.r_cache
 | |
| r_serv_onion = Flask_config.r_serv_onion
 | |
| r_serv_metadata = Flask_config.r_serv_metadata
 | |
| crawler_enabled = Flask_config.crawler_enabled
 | |
| bootstrap_label = Flask_config.bootstrap_label
 | |
| 
 | |
| sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
 | |
| import crawlers
 | |
| 
 | |
| hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
 | |
| 
 | |
| faup = Faup()
 | |
| list_types=['onion', 'regular']
 | |
| dic_type_name={'onion':'Onion', 'regular':'Website'}
 | |
| 
 | |
| # ============ FUNCTIONS ============
 | |
| 
 | |
| def one():
 | |
|     return 1
 | |
| 
 | |
| def get_date_range(num_day):
 | |
|     curr_date = datetime.date.today()
 | |
|     date = Date( '{}{}{}'.format(str(curr_date.year), str(curr_date.month).zfill(2), str(curr_date.day).zfill(2)) )
 | |
|     date_list = []
 | |
| 
 | |
|     for i in range(0, num_day):
 | |
|         date_list.append(date.substract_day(i))
 | |
| 
 | |
|     return list(reversed(date_list))
 | |
| 
 | |
| def substract_date(date_from, date_to):
 | |
|     date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
 | |
|     date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
 | |
|     delta = date_to - date_from # timedelta
 | |
|     l_date = []
 | |
|     for i in range(delta.days + 1):
 | |
|         date = date_from + datetime.timedelta(i)
 | |
|         l_date.append( date.strftime('%Y%m%d') )
 | |
|     return l_date
 | |
| 
 | |
| def unpack_paste_tags(p_tags):
 | |
|     l_tags = []
 | |
|     for tag in p_tags:
 | |
|         complete_tag = tag
 | |
|         tag = tag.split('=')
 | |
|         if len(tag) > 1:
 | |
|             if tag[1] != '':
 | |
|                 tag = tag[1][1:-1]
 | |
|             # no value
 | |
|             else:
 | |
|                 tag = tag[0][1:-1]
 | |
|         # use for custom tags
 | |
|         else:
 | |
|             tag = tag[0]
 | |
|         l_tags.append( (tag, complete_tag) )
 | |
|     return l_tags
 | |
| 
 | |
| def is_valid_domain(domain):
 | |
|     faup.decode(domain)
 | |
|     domain_unpack = faup.get()
 | |
|     if domain_unpack['tld'] is not None and domain_unpack['scheme'] is None and domain_unpack['port'] is None and domain_unpack['query_string'] is None:
 | |
|         return True
 | |
|     else:
 | |
|         return False
 | |
| 
 | |
| def is_valid_service_type(service_type):
 | |
|     accepted_service = ['onion', 'regular']
 | |
|     if service_type in accepted_service:
 | |
|         return True
 | |
|     else:
 | |
|         return False
 | |
| 
 | |
| def get_onion_status(domain, date):
 | |
|     if r_serv_onion.sismember('onion_up:'+date , domain):
 | |
|         return True
 | |
|     else:
 | |
|         return False
 | |
| 
 | |
| def get_domain_type(domain):
 | |
|     type_id = domain.split(':')[-1]
 | |
|     if type_id == 'onion':
 | |
|         return 'onion'
 | |
|     else:
 | |
|         return 'regular'
 | |
| 
 | |
| def get_type_domain(domain):
 | |
|     if domain is None:
 | |
|         type = 'regular'
 | |
|     else:
 | |
|         if domain.rsplit('.', 1)[1] == 'onion':
 | |
|             type = 'onion'
 | |
|         else:
 | |
|             type = 'regular'
 | |
|     return type
 | |
| 
 | |
| def get_domain_from_url(url):
 | |
|     faup.decode(url)
 | |
|     unpack_url = faup.get()
 | |
|     domain = unpack_url['domain']
 | |
|     ## TODO: FIXME remove me
 | |
|     try:
 | |
|         domain = domain.decode()
 | |
|     except:
 | |
|         pass
 | |
|     return domain
 | |
| 
 | |
| def get_last_domains_crawled(type):
 | |
|     return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1)
 | |
| 
 | |
| def get_nb_domains_inqueue(type):
 | |
|     nb = r_serv_onion.scard('{}_crawler_queue'.format(type))
 | |
|     nb += r_serv_onion.scard('{}_crawler_priority_queue'.format(type))
 | |
|     return nb
 | |
| 
 | |
| def get_stats_last_crawled_domains(type, date):
 | |
|     statDomains = {}
 | |
|     statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date))
 | |
|     statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date))
 | |
|     statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
 | |
|     statDomains['domains_queue'] = get_nb_domains_inqueue(type)
 | |
|     return statDomains
 | |
| 
 | |
| def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, auto_mode=False):
 | |
|     list_crawled_metadata = []
 | |
|     for domain_epoch in list_domains_crawled:
 | |
|         if not auto_mode:
 | |
|             domain, epoch = domain_epoch.rsplit(';', 1)
 | |
|         else:
 | |
|             url = domain_epoch
 | |
|             domain = domain_epoch
 | |
|         domain = domain.split(':')
 | |
|         if len(domain) == 1:
 | |
|             port = 80
 | |
|             domain = domain[0]
 | |
|         else:
 | |
|             port = domain[1]
 | |
|             domain = domain[0]
 | |
|         metadata_domain = {}
 | |
|         # get Domain type
 | |
|         if type is None:
 | |
|             type_domain = get_type_domain(domain)
 | |
|         else:
 | |
|             type_domain = type
 | |
|         if auto_mode:
 | |
|             metadata_domain['url'] = url
 | |
|             epoch = r_serv_onion.zscore('crawler_auto_queue', '{};auto;{}'.format(domain, type_domain))
 | |
|             #domain in priority queue
 | |
|             if epoch is None:
 | |
|                 epoch = 'In Queue'
 | |
|             else:
 | |
|                 epoch = datetime.datetime.fromtimestamp(float(epoch)).strftime('%Y-%m-%d %H:%M:%S')
 | |
| 
 | |
|         metadata_domain['domain'] = domain
 | |
|         if len(domain) > 45:
 | |
|             domain_name, tld_domain = domain.rsplit('.', 1)
 | |
|             metadata_domain['domain_name'] = '{}[...].{}'.format(domain_name[:40], tld_domain)
 | |
|         else:
 | |
|             metadata_domain['domain_name'] = domain
 | |
|         metadata_domain['port'] = port
 | |
|         metadata_domain['epoch'] = epoch
 | |
|         metadata_domain['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(type_domain, domain), 'last_check')
 | |
|         if metadata_domain['last_check'] is None:
 | |
|             metadata_domain['last_check'] = '********'
 | |
|         metadata_domain['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(type_domain, domain), 'first_seen')
 | |
|         if metadata_domain['first_seen'] is None:
 | |
|             metadata_domain['first_seen'] = '********'
 | |
|         if r_serv_onion.sismember('{}_up:{}'.format(type_domain, metadata_domain['last_check']) , domain):
 | |
|             metadata_domain['status_text'] = 'UP'
 | |
|             metadata_domain['status_color'] = 'Green'
 | |
|             metadata_domain['status_icon'] = 'fa-check-circle'
 | |
|         else:
 | |
|             metadata_domain['status_text'] = 'DOWN'
 | |
|             metadata_domain['status_color'] = 'Red'
 | |
|             metadata_domain['status_icon'] = 'fa-times-circle'
 | |
|         list_crawled_metadata.append(metadata_domain)
 | |
|     return list_crawled_metadata
 | |
| 
 | |
| def get_crawler_splash_status(type):
 | |
|     crawler_metadata = []
 | |
|     all_crawlers = r_cache.smembers('{}_crawlers'.format(type))
 | |
|     for crawler in all_crawlers:
 | |
|         crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
 | |
|         started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
 | |
|         status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
 | |
|         crawler_info = '{}  - {}'.format(crawler, started_time)
 | |
|         if status_info=='Waiting' or status_info=='Crawling':
 | |
|             status=True
 | |
|         else:
 | |
|             status=False
 | |
|         crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
 | |
| 
 | |
|     return crawler_metadata
 | |
| 
 | |
| def delete_auto_crawler(url):
 | |
|     domain = get_domain_from_url(url)
 | |
|     type = get_type_domain(domain)
 | |
|     # remove from set
 | |
|     r_serv_onion.srem('auto_crawler_url:{}'.format(type), url)
 | |
|     # remove config
 | |
|     r_serv_onion.delete('crawler_config:auto:{}:{}:{}'.format(type, domain, url))
 | |
|     # remove from queue
 | |
|     r_serv_onion.srem('{}_crawler_priority_queue'.format(type), '{};auto'.format(url))
 | |
|     # remove from crawler_auto_queue
 | |
|     r_serv_onion.zrem('crawler_auto_queue'.format(type), '{};auto;{}'.format(url, type))
 | |
| 
 | |
| # ============= ROUTES ==============
 | |
| 
 | |
| @hiddenServices.route("/crawlers/", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def dashboard():
 | |
|     crawler_metadata_onion = get_crawler_splash_status('onion')
 | |
|     crawler_metadata_regular = get_crawler_splash_status('regular')
 | |
| 
 | |
|     now = datetime.datetime.now()
 | |
|     date = now.strftime("%Y%m%d")
 | |
|     statDomains_onion = get_stats_last_crawled_domains('onion', date)
 | |
|     statDomains_regular = get_stats_last_crawled_domains('regular', date)
 | |
| 
 | |
|     return render_template("Crawler_dashboard.html", crawler_metadata_onion = crawler_metadata_onion,
 | |
|                                 crawler_enabled=crawler_enabled, date=date,
 | |
|                                 crawler_metadata_regular=crawler_metadata_regular,
 | |
|                                 statDomains_onion=statDomains_onion, statDomains_regular=statDomains_regular)
 | |
| 
 | |
| @hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def crawler_splash_onion():
 | |
|     type = 'onion'
 | |
|     last_onions = get_last_domains_crawled(type)
 | |
|     list_onion = []
 | |
| 
 | |
|     now = datetime.datetime.now()
 | |
|     date = now.strftime("%Y%m%d")
 | |
|     statDomains = get_stats_last_crawled_domains(type, date)
 | |
| 
 | |
|     list_onion = get_last_crawled_domains_metadata(last_onions, date, type=type)
 | |
|     crawler_metadata = get_crawler_splash_status(type)
 | |
| 
 | |
|     date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
 | |
|     return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
 | |
|                             crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
 | |
| 
 | |
| @hiddenServices.route("/crawlers/Crawler_Splash_last_by_type", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def Crawler_Splash_last_by_type():
 | |
|     type = request.args.get('type')
 | |
|     # verify user input
 | |
|     if type not in list_types:
 | |
|         type = 'onion'
 | |
|     type_name = dic_type_name[type]
 | |
|     list_domains = []
 | |
| 
 | |
|     now = datetime.datetime.now()
 | |
|     date = now.strftime("%Y%m%d")
 | |
|     date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
 | |
| 
 | |
|     statDomains = get_stats_last_crawled_domains(type, date)
 | |
| 
 | |
|     list_domains = get_last_crawled_domains_metadata(get_last_domains_crawled(type), date, type=type)
 | |
|     crawler_metadata = get_crawler_splash_status(type)
 | |
| 
 | |
|     return render_template("Crawler_Splash_last_by_type.html", type=type, type_name=type_name,
 | |
|                             crawler_enabled=crawler_enabled,
 | |
|                             last_domains=list_domains, statDomains=statDomains,
 | |
|                             crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
 | |
| 
 | |
| @hiddenServices.route("/crawlers/blacklisted_domains", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def blacklisted_domains():
 | |
|     blacklist_domain = request.args.get('blacklist_domain')
 | |
|     unblacklist_domain = request.args.get('unblacklist_domain')
 | |
|     type = request.args.get('type')
 | |
|     if type in list_types:
 | |
|         type_name = dic_type_name[type]
 | |
|         if blacklist_domain is not None:
 | |
|             blacklist_domain = int(blacklist_domain)
 | |
|         if unblacklist_domain is not None:
 | |
|             unblacklist_domain = int(unblacklist_domain)
 | |
|         try:
 | |
|             page = int(request.args.get('page'))
 | |
|         except:
 | |
|             page = 1
 | |
|         if page <= 0:
 | |
|             page = 1
 | |
|         nb_page_max = r_serv_onion.scard('blacklist_{}'.format(type))/(1000)
 | |
|         if isinstance(nb_page_max, float):
 | |
|             nb_page_max = int(nb_page_max)+1
 | |
|         if page > nb_page_max:
 | |
|             page = nb_page_max
 | |
|         start = 1000*(page -1)
 | |
|         stop = 1000*page
 | |
| 
 | |
|         list_blacklisted = list(r_serv_onion.smembers('blacklist_{}'.format(type)))
 | |
|         list_blacklisted_1 = list_blacklisted[start:stop]
 | |
|         list_blacklisted_2 = list_blacklisted[stop:stop+1000]
 | |
|         return render_template("blacklisted_domains.html", list_blacklisted_1=list_blacklisted_1, list_blacklisted_2=list_blacklisted_2,
 | |
|                                 type=type, type_name=type_name, page=page, nb_page_max=nb_page_max,
 | |
|                                 blacklist_domain=blacklist_domain, unblacklist_domain=unblacklist_domain)
 | |
|     else:
 | |
|         return 'Incorrect Type'
 | |
| 
 | |
| @hiddenServices.route("/crawler/blacklist_domain", methods=['GET'])
 | |
| @login_required
 | |
| @login_analyst
 | |
| def blacklist_domain():
 | |
|     domain = request.args.get('domain')
 | |
|     type = request.args.get('type')
 | |
|     try:
 | |
|         page = int(request.args.get('page'))
 | |
|     except:
 | |
|         page = 1
 | |
|     if type in list_types:
 | |
|         if is_valid_domain(domain):
 | |
|             res = r_serv_onion.sadd('blacklist_{}'.format(type), domain)
 | |
|             if page:
 | |
|                 if res == 0:
 | |
|                     return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=2))
 | |
|                 else:
 | |
|                     return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=1))
 | |
|         else:
 | |
|             return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=0))
 | |
|     else:
 | |
|         return 'Incorrect type'
 | |
| 
 | |
| @hiddenServices.route("/crawler/unblacklist_domain", methods=['GET'])
 | |
| @login_required
 | |
| @login_analyst
 | |
| def unblacklist_domain():
 | |
|     domain = request.args.get('domain')
 | |
|     type = request.args.get('type')
 | |
|     try:
 | |
|         page = int(request.args.get('page'))
 | |
|     except:
 | |
|         page = 1
 | |
|     if type in list_types:
 | |
|         if is_valid_domain(domain):
 | |
|             res = r_serv_onion.srem('blacklist_{}'.format(type), domain)
 | |
|             if page:
 | |
|                 if res == 0:
 | |
|                     return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=2))
 | |
|                 else:
 | |
|                     return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=1))
 | |
|         else:
 | |
|             return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=0))
 | |
|     else:
 | |
|         return 'Incorrect type'
 | |
| 
 | |
| @hiddenServices.route("/crawlers/auto_crawler", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def auto_crawler():
 | |
|     nb_element_to_display = 100
 | |
|     try:
 | |
|         page = int(request.args.get('page'))
 | |
|     except:
 | |
|         page = 1
 | |
|     if page <= 0:
 | |
|         page = 1
 | |
| 
 | |
|     nb_auto_onion = r_serv_onion.scard('auto_crawler_url:onion')
 | |
|     nb_auto_regular = r_serv_onion.scard('auto_crawler_url:regular')
 | |
| 
 | |
|     if nb_auto_onion > nb_auto_regular:
 | |
|         nb_max = nb_auto_onion
 | |
|     else:
 | |
|         nb_max = nb_auto_regular
 | |
| 
 | |
|     nb_page_max = nb_max/(nb_element_to_display)
 | |
|     if isinstance(nb_page_max, float):
 | |
|         nb_page_max = int(nb_page_max)+1
 | |
|     if page > nb_page_max:
 | |
|         page = nb_page_max
 | |
|     start = nb_element_to_display*(page -1)
 | |
|     stop = nb_element_to_display*page
 | |
| 
 | |
|     last_auto_crawled = get_last_domains_crawled('auto_crawled')
 | |
|     last_domains = get_last_crawled_domains_metadata(last_auto_crawled, '')
 | |
| 
 | |
|     if start > nb_auto_onion:
 | |
|         auto_crawler_domain_onions = []
 | |
|     elif stop > nb_auto_onion:
 | |
|         auto_crawler_domain_onions = list(r_serv_onion.smembers('auto_crawler_url:onion'))[start:nb_auto_onion]
 | |
|     else:
 | |
|         auto_crawler_domain_onions = list(r_serv_onion.smembers('auto_crawler_url:onion'))[start:stop]
 | |
| 
 | |
|     if start > nb_auto_regular:
 | |
|         auto_crawler_domain_regular = []
 | |
|     elif stop > nb_auto_regular:
 | |
|         auto_crawler_domain_regular = list(r_serv_onion.smembers('auto_crawler_url:regular'))[start:nb_auto_regular]
 | |
|     else:
 | |
|         auto_crawler_domain_regular = list(r_serv_onion.smembers('auto_crawler_url:regular'))[start:stop]
 | |
| 
 | |
|     auto_crawler_domain_onions_metadata = get_last_crawled_domains_metadata(auto_crawler_domain_onions, '', type='onion', auto_mode=True)
 | |
|     auto_crawler_domain_regular_metadata = get_last_crawled_domains_metadata(auto_crawler_domain_regular, '', type='regular', auto_mode=True)
 | |
| 
 | |
|     return render_template("Crawler_auto.html", page=page, nb_page_max=nb_page_max,
 | |
|                                 last_domains=last_domains,
 | |
|                                 crawler_enabled=crawler_enabled,
 | |
|                                 auto_crawler_domain_onions_metadata=auto_crawler_domain_onions_metadata,
 | |
|                                 auto_crawler_domain_regular_metadata=auto_crawler_domain_regular_metadata)
 | |
| 
 | |
| @hiddenServices.route("/crawlers/remove_auto_crawler", methods=['GET'])
 | |
| @login_required
 | |
| @login_analyst
 | |
| def remove_auto_crawler():
 | |
|     url = request.args.get('url')
 | |
|     page = request.args.get('page')
 | |
| 
 | |
|     if url:
 | |
|         delete_auto_crawler(url)
 | |
|     return redirect(url_for('hiddenServices.auto_crawler', page=page))
 | |
| 
 | |
| @hiddenServices.route("/crawlers/crawler_dashboard_json", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def crawler_dashboard_json():
 | |
| 
 | |
|     crawler_metadata_onion = get_crawler_splash_status('onion')
 | |
|     crawler_metadata_regular = get_crawler_splash_status('regular')
 | |
| 
 | |
|     now = datetime.datetime.now()
 | |
|     date = now.strftime("%Y%m%d")
 | |
| 
 | |
|     statDomains_onion = get_stats_last_crawled_domains('onion', date)
 | |
|     statDomains_regular = get_stats_last_crawled_domains('regular', date)
 | |
| 
 | |
|     return jsonify({'statDomains_onion': statDomains_onion, 'statDomains_regular': statDomains_regular,
 | |
|                         'crawler_metadata_onion':crawler_metadata_onion, 'crawler_metadata_regular':crawler_metadata_regular})
 | |
| 
 | |
| # # TODO: refractor
 | |
| @hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def last_crawled_domains_with_stats_json():
 | |
|     last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
 | |
|     list_onion = []
 | |
| 
 | |
|     now = datetime.datetime.now()
 | |
|     date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
 | |
|     statDomains = {}
 | |
|     statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
 | |
|     statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
 | |
|     statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
 | |
|     statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
 | |
| 
 | |
|     for onion in last_onions:
 | |
|         metadata_onion = {}
 | |
|         metadata_onion['domain'] = onion
 | |
|         metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
 | |
|         if metadata_onion['last_check'] is None:
 | |
|             metadata_onion['last_check'] = '********'
 | |
|         metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
 | |
|         if metadata_onion['first_seen'] is None:
 | |
|             metadata_onion['first_seen'] = '********'
 | |
|         if get_onion_status(onion, metadata_onion['last_check']):
 | |
|             metadata_onion['status_text'] = 'UP'
 | |
|             metadata_onion['status_color'] = 'Green'
 | |
|             metadata_onion['status_icon'] = 'fa-check-circle'
 | |
|         else:
 | |
|             metadata_onion['status_text'] = 'DOWN'
 | |
|             metadata_onion['status_color'] = 'Red'
 | |
|             metadata_onion['status_icon'] = 'fa-times-circle'
 | |
|         list_onion.append(metadata_onion)
 | |
| 
 | |
|     crawler_metadata=[]
 | |
|     all_onion_crawler = r_cache.smembers('all_crawler:onion')
 | |
|     for crawler in all_onion_crawler:
 | |
|         crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
 | |
|         started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
 | |
|         status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
 | |
|         crawler_info = '{}  - {}'.format(crawler, started_time)
 | |
|         if status_info=='Waiting' or status_info=='Crawling':
 | |
|             status=True
 | |
|         else:
 | |
|             status=False
 | |
|         crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
 | |
| 
 | |
|     date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
 | |
| 
 | |
|     return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata})
 | |
| 
 | |
| @hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def get_onions_by_daterange():
 | |
|     date_from = request.form.get('date_from')
 | |
|     date_to = request.form.get('date_to')
 | |
|     service_type = request.form.get('service_type')
 | |
|     domains_up = request.form.get('domains_up')
 | |
|     domains_down = request.form.get('domains_down')
 | |
|     domains_tags = request.form.get('domains_tags')
 | |
| 
 | |
|     return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, service_type=service_type, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags))
 | |
| 
 | |
| @hiddenServices.route("/hiddenServices/show_domains_by_daterange", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def show_domains_by_daterange():
 | |
|     date_from = request.args.get('date_from')
 | |
|     date_to = request.args.get('date_to')
 | |
|     service_type = request.args.get('service_type')
 | |
|     domains_up = request.args.get('domains_up')
 | |
|     domains_down = request.args.get('domains_down')
 | |
|     domains_tags = request.args.get('domains_tags')
 | |
| 
 | |
|     # incorrect service type
 | |
|     if not is_valid_service_type(service_type):
 | |
|         service_type = 'onion'
 | |
| 
 | |
|     type_name = dic_type_name[service_type]
 | |
| 
 | |
|     date_range = []
 | |
|     if date_from is not None and date_to is not None:
 | |
|         #change format
 | |
|         try:
 | |
|             if len(date_from) != 8:
 | |
|                 date_from = date_from[0:4] + date_from[5:7] + date_from[8:10]
 | |
|                 date_to = date_to[0:4] + date_to[5:7] + date_to[8:10]
 | |
|             date_range = substract_date(date_from, date_to)
 | |
|         except:
 | |
|             pass
 | |
| 
 | |
|     if not date_range:
 | |
|         date_range.append(datetime.date.today().strftime("%Y%m%d"))
 | |
|         date_from = date_range[0][0:4] + '-' + date_range[0][4:6] + '-' + date_range[0][6:8]
 | |
|         date_to = date_from
 | |
| 
 | |
|     else:
 | |
|         date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8]
 | |
|         date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8]
 | |
| 
 | |
|     statDomains = {}
 | |
|     statDomains['domains_up'] = 0
 | |
|     statDomains['domains_down'] = 0
 | |
|     statDomains['total'] = 0
 | |
|     statDomains['domains_queue'] = get_nb_domains_inqueue(service_type)
 | |
| 
 | |
|     domains_by_day = {}
 | |
|     domain_metadata = {}
 | |
|     stats_by_date = {}
 | |
|     for date in date_range:
 | |
|         stats_by_date[date] = {}
 | |
|         stats_by_date[date]['domain_up'] = 0
 | |
|         stats_by_date[date]['domain_down'] = 0
 | |
|         if domains_up:
 | |
|             domains_up = True
 | |
|             domains_by_day[date] = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date)))
 | |
|             for domain in domains_by_day[date]:
 | |
|                 h = HiddenServices(domain, 'onion')
 | |
|                 domain_metadata[domain] = {}
 | |
|                 if domains_tags:
 | |
|                     domains_tags = True
 | |
|                     domain_metadata[domain]['tags'] = h.get_domain_tags(update=True)
 | |
| 
 | |
|                 domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check')
 | |
|                 if domain_metadata[domain]['last_check'] is None:
 | |
|                     domain_metadata[domain]['last_check'] = '********'
 | |
|                 domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen')
 | |
|                 if domain_metadata[domain]['first_seen'] is None:
 | |
|                     domain_metadata[domain]['first_seen'] = '********'
 | |
|                 domain_metadata[domain]['status_text'] = 'UP'
 | |
|                 domain_metadata[domain]['status_color'] = 'Green'
 | |
|                 domain_metadata[domain]['status_icon'] = 'fa-check-circle'
 | |
|                 statDomains['domains_up'] += 1
 | |
|                 stats_by_date[date]['domain_up'] += 1
 | |
| 
 | |
|         if domains_down:
 | |
|             domains_down = True
 | |
|             domains_by_day_down = list(r_serv_onion.smembers('{}_down:{}'.format(service_type, date)))
 | |
|             if domains_up:
 | |
|                 domains_by_day[date].extend(domains_by_day_down)
 | |
|             else:
 | |
|                 domains_by_day[date] = domains_by_day_down
 | |
|             for domain in domains_by_day_down:
 | |
|                 #h = HiddenServices(onion_domain, 'onion')
 | |
|                 domain_metadata[domain] = {}
 | |
|                 #domain_metadata[domain]['tags'] = h.get_domain_tags()
 | |
| 
 | |
|                 domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check')
 | |
|                 if domain_metadata[domain]['last_check'] is None:
 | |
|                     domain_metadata[domain]['last_check'] = '********'
 | |
|                 domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen')
 | |
|                 if domain_metadata[domain]['first_seen'] is None:
 | |
|                     domain_metadata[domain]['first_seen'] = '********'
 | |
| 
 | |
|                 domain_metadata[domain]['status_text'] = 'DOWN'
 | |
|                 domain_metadata[domain]['status_color'] = 'Red'
 | |
|                 domain_metadata[domain]['status_icon'] = 'fa-times-circle'
 | |
|                 statDomains['domains_down'] += 1
 | |
|                 stats_by_date[date]['domain_down'] += 1
 | |
| 
 | |
|         statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
 | |
| 
 | |
|     return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day,
 | |
|                                 statDomains=statDomains, type_name=type_name,
 | |
|                                 domain_metadata=domain_metadata,
 | |
|                                 stats_by_date=stats_by_date,
 | |
|                                 date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down,
 | |
|                                 domains_tags=domains_tags, type=service_type, bootstrap_label=bootstrap_label)
 | |
| 
 | |
| @hiddenServices.route("/crawlers/download_domain", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| @no_cache
 | |
| def download_domain():
 | |
|     domain = request.args.get('domain')
 | |
|     epoch = request.args.get('epoch')
 | |
|     try:
 | |
|         epoch = int(epoch)
 | |
|     except:
 | |
|         epoch = None
 | |
|     port = request.args.get('port')
 | |
|     faup.decode(domain)
 | |
|     unpack_url = faup.get()
 | |
| 
 | |
|     ## TODO: # FIXME: remove me
 | |
|     try:
 | |
|         domain = unpack_url['domain'].decode()
 | |
|     except:
 | |
|         domain = unpack_url['domain']
 | |
| 
 | |
|     if not port:
 | |
|         if unpack_url['port']:
 | |
|             try:
 | |
|                 port = unpack_url['port'].decode()
 | |
|             except:
 | |
|                 port = unpack_url['port']
 | |
|         else:
 | |
|             port = 80
 | |
|     try:
 | |
|         port = int(port)
 | |
|     except:
 | |
|         port = 80
 | |
|     type = get_type_domain(domain)
 | |
|     if domain is None or not r_serv_onion.exists('{}_metadata:{}'.format(type, domain)):
 | |
|         return '404'
 | |
|         # # TODO: FIXME return 404
 | |
| 
 | |
|     origin_paste = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'paste_parent')
 | |
| 
 | |
|     h = HiddenServices(domain, type, port=port)
 | |
|     item_core = h.get_domain_crawled_core_item(epoch=epoch)
 | |
|     if item_core:
 | |
|         l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
 | |
|     else:
 | |
|         l_pastes = []
 | |
|     #dict_links = h.get_all_links(l_pastes)
 | |
| 
 | |
|     zip_file = h.create_domain_basic_archive(l_pastes)
 | |
|     filename = domain + '.zip'
 | |
| 
 | |
|     return send_file(zip_file, attachment_filename=filename, as_attachment=True)
 | |
| 
 | |
| 
 | |
| @hiddenServices.route("/hiddenServices/onion_son", methods=['GET'])
 | |
| @login_required
 | |
| @login_analyst
 | |
| def onion_son():
 | |
|     onion_domain = request.args.get('onion_domain')
 | |
| 
 | |
|     h = HiddenServices(onion_domain, 'onion')
 | |
|     l_pastes = h.get_last_crawled_pastes()
 | |
|     l_son = h.get_domain_son(l_pastes)
 | |
|     return 'l_son'
 | |
| 
 | |
| # ============= JSON ==============
 | |
| @hiddenServices.route("/hiddenServices/domain_crawled_7days_json", methods=['GET'])
 | |
| @login_required
 | |
| @login_read_only
 | |
| def domain_crawled_7days_json():
 | |
|     type = 'onion'
 | |
|         ## TODO: # FIXME: 404 error
 | |
| 
 | |
|     date_range = get_date_range(7)
 | |
|     json_domain_stats = []
 | |
|     #try:
 | |
|     for date in date_range:
 | |
|         nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
 | |
|         nb_domain_down = r_serv_onion.scard('{}_up:{}'.format(type, date))
 | |
|         date = date[0:4] + '-' + date[4:6] + '-' + date[6:8]
 | |
|         json_domain_stats.append({ 'date': date, 'value': int( nb_domain_up ), 'nb_domain_down': int( nb_domain_down )})
 | |
|     #except:
 | |
|         #return jsonify()
 | |
| 
 | |
|     return jsonify(json_domain_stats)
 | |
| 
 | |
| @hiddenServices.route('/hiddenServices/domain_crawled_by_type_json')
 | |
| @login_required
 | |
| @login_read_only
 | |
| def domain_crawled_by_type_json():
 | |
|     current_date = request.args.get('date')
 | |
|     type = request.args.get('type')
 | |
|     if type in list_types:
 | |
| 
 | |
|         num_day_type = 7
 | |
|         date_range = get_date_range(num_day_type)
 | |
|         range_decoder = []
 | |
|         for date in date_range:
 | |
|             day_crawled = {}
 | |
|             day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8]
 | |
|             day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
 | |
|             day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_down:{}'.format(type, date))
 | |
|             range_decoder.append(day_crawled)
 | |
| 
 | |
|         return jsonify(range_decoder)
 | |
| 
 | |
|     else:
 | |
|         return jsonify('Incorrect Type')
 | |
| 
 | |
| # ========= REGISTRATION =========
 | |
| app.register_blueprint(hiddenServices, url_prefix=baseUrl)
 |