diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py
index dee511dc..64bbdafb 100644
--- a/var/www/modules/hiddenServices/Flask_hiddenServices.py
+++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py
@@ -30,6 +30,8 @@ PASTES_FOLDER = Flask_config.PASTES_FOLDER
hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
faup = Faup()
+list_types=['onion', 'regular']
+dic_type_name={'onion':'Onion', 'regular':'Website'}
# ============ FUNCTIONS ============
def one():
@@ -72,10 +74,10 @@ def unpack_paste_tags(p_tags):
l_tags.append( (tag, complete_tag) )
return l_tags
-def is_valid_onion_domain(onion_domain):
- faup.decode(onion_domain)
+def is_valid_domain(domain):
+ faup.decode(domain)
domain_unpack = faup.get()
- if domain_unpack['tld']==b'onion' and domain_unpack['scheme'] is None and domain_unpack['port'] is None and domain_unpack['query_string'] is None:
+ if domain_unpack['tld'] is not None and domain_unpack['scheme'] is None and domain_unpack['port'] is None and domain_unpack['query_string'] is None:
return True
else:
return False
@@ -93,6 +95,18 @@ def get_domain_type(domain):
else:
return 'regular'
+def get_last_domains_crawled(type):
+ return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1)
+
+def get_stats_last_crawled_domains(type, date):
+ statDomains = {}
+ statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date))
+ statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date))
+ statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
+ statDomains['domains_queue'] = r_serv_onion.scard('{}_crawler_queue'.format(type))
+ statDomains['domains_queue'] += r_serv_onion.scard('{}_crawler_priority_queue'.format(type))
+ return statDomains
+
def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None):
list_crawled_metadata = []
for domain_epoch in list_domains_crawled:
@@ -153,6 +167,10 @@ def send_url_to_crawl_in_queue(mode, service_type, url):
# ============= ROUTES ==============
+@hiddenServices.route("/crawlers/", methods=['GET'])
+def dashboard():
+ return render_template("Crawler_dashboard.html")
+
@hiddenServices.route("/hiddenServices/2", methods=['GET'])
def hiddenServices_page_test():
return render_template("Crawler_index.html")
@@ -163,124 +181,115 @@ def manual():
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
def crawler_splash_onion():
- last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
+ type = 'onion'
+ last_onions = get_last_domains_crawled(type)
list_onion = []
now = datetime.datetime.now()
date = now.strftime("%Y%m%d")
- statDomains = {}
- statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
- statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
- statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
- statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
+ statDomains = get_stats_last_crawled_domains(type, date)
- list_onion = get_last_crawled_domains_metadata(last_onions, date, type='onion')
- crawler_metadata = get_crawler_splash_status('onion')
+ list_onion = get_last_crawled_domains_metadata(last_onions, date, type=type)
+ crawler_metadata = get_crawler_splash_status(type)
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
-@hiddenServices.route("/crawlers/manual_splash_crawler", methods=['GET'])
-def manual_splash_crawler():
+@hiddenServices.route("/crawlers/crawler_splash_regular", methods=['GET'])
+def crawler_splash_regular():
+ type = 'regular'
+ type_name = dic_type_name[type]
+ list_domains = []
now = datetime.datetime.now()
- date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
-
- # Stats
- # user request == CHECK
- # preconf crawlers == ?????
- #################################################################################
- statDomains = {}
- #statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
- #statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
- #statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
- #statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
- ####################################################################################
-
- last_crawled = r_serv_onion.lrange('last_crawled_manual', 0 ,-1)
- list_crawled = get_last_crawled_domains_metadata(last_crawled)
-
- crawler_metadata=[]
- all_onion_crawler = r_cache.smembers('all_crawler:onion')
- for crawler in all_onion_crawler:
- crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
- started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
- status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
- crawler_info = '{} - {}'.format(crawler, started_time)
- if status_info=='Waiting' or status_info=='Crawling':
- status=True
- else:
- status=False
- crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
-
+ date = now.strftime("%Y%m%d")
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
- return render_template("Crawler_Splash_onion.html", last_crawled=list_crawled, statDomains=statDomains,
+
+ statDomains = get_stats_last_crawled_domains(type, date)
+
+ list_domains = get_last_crawled_domains_metadata(get_last_domains_crawled(type), date, type=type)
+ crawler_metadata = get_crawler_splash_status(type)
+
+ return render_template("Crawler_Splash_last_by_type.html", type=type, type_name=type_name,
+ last_domains=list_domains, statDomains=statDomains,
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
-@hiddenServices.route("/crawlers/blacklisted_onion", methods=['GET'])
-def blacklisted_onion():
- blacklist_onion = request.args.get('blacklist_onion')
- unblacklist_onion = request.args.get('unblacklist_onion')
- if blacklist_onion is not None:
- blacklist_onion = int(blacklist_onion)
- if unblacklist_onion is not None:
- unblacklist_onion = int(unblacklist_onion)
- try:
- page = int(request.args.get('page'))
- except:
- page = 1
- if page <= 0:
- page = 1
- nb_page_max = r_serv_onion.scard('blacklist_onion')/(1000)
- if isinstance(nb_page_max, float):
- nb_page_max = int(nb_page_max)+1
- if page > nb_page_max:
- page = nb_page_max
- start = 1000*(page -1)
- stop = 1000*page
+@hiddenServices.route("/crawlers/blacklisted_domains", methods=['GET'])
+def blacklisted_domains():
+ blacklist_domain = request.args.get('blacklist_domain')
+ unblacklist_domain = request.args.get('unblacklist_domain')
+ type = request.args.get('type')
+ if type in list_types:
+ type_name = dic_type_name[type]
+ if blacklist_domain is not None:
+ blacklist_domain = int(blacklist_domain)
+ if unblacklist_domain is not None:
+ unblacklist_domain = int(unblacklist_domain)
+ try:
+ page = int(request.args.get('page'))
+ except:
+ page = 1
+ if page <= 0:
+ page = 1
+ nb_page_max = r_serv_onion.scard('blacklist_{}'.format(type))/(1000)
+ if isinstance(nb_page_max, float):
+ nb_page_max = int(nb_page_max)+1
+ if page > nb_page_max:
+ page = nb_page_max
+ start = 1000*(page -1)
+ stop = 1000*page
- list_blacklisted = list(r_serv_onion.smembers('blacklist_onion'))
- list_blacklisted_1 = list_blacklisted[start:stop]
- list_blacklisted_2 = list_blacklisted[stop:stop+1000]
- return render_template("blacklisted_onion.html", list_blacklisted_1=list_blacklisted_1, list_blacklisted_2=list_blacklisted_2,
- page=page, nb_page_max=nb_page_max,
- blacklist_onion=blacklist_onion, unblacklist_onion=unblacklist_onion)
-
-@hiddenServices.route("/crawler/blacklist_onion", methods=['GET'])
-def blacklist_onion():
- onion = request.args.get('onion')
- try:
- page = int(request.args.get('page'))
- except:
- page = 1
- if is_valid_onion_domain(onion):
- res = r_serv_onion.sadd('blacklist_onion', onion)
- print(res)
- if page:
- if res == 0:
- return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=2))
- else:
- return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=1))
+ list_blacklisted = list(r_serv_onion.smembers('blacklist_{}'.format(type)))
+ list_blacklisted_1 = list_blacklisted[start:stop]
+ list_blacklisted_2 = list_blacklisted[stop:stop+1000]
+ return render_template("blacklisted_domains.html", list_blacklisted_1=list_blacklisted_1, list_blacklisted_2=list_blacklisted_2,
+ type=type, type_name=type_name, page=page, nb_page_max=nb_page_max,
+ blacklist_domain=blacklist_domain, unblacklist_domain=unblacklist_domain)
else:
- return redirect(url_for('hiddenServices.blacklisted_onion', page=page, blacklist_onion=0))
+ return 'Incorrect Type'
-@hiddenServices.route("/crawler/unblacklist_onion", methods=['GET'])
-def unblacklist_onion():
- onion = request.args.get('onion')
+@hiddenServices.route("/crawler/blacklist_domain", methods=['GET'])
+def blacklist_domain():
+ domain = request.args.get('domain')
+ type = request.args.get('type')
try:
page = int(request.args.get('page'))
except:
page = 1
- if is_valid_onion_domain(onion):
- res = r_serv_onion.srem('blacklist_onion', onion)
- if page:
- if res == 0:
- return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=2))
- else:
- return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=1))
+ if type in list_types:
+ if is_valid_domain(domain):
+ res = r_serv_onion.sadd('blacklist_{}'.format(type), domain)
+ if page:
+ if res == 0:
+ return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=2))
+ else:
+ return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=1))
+ else:
+ return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, blacklist_domain=0))
else:
- return redirect(url_for('hiddenServices.blacklisted_onion', page=page, unblacklist_onion=0))
+ return 'Incorrect type'
+
+@hiddenServices.route("/crawler/unblacklist_domain", methods=['GET'])
+def unblacklist_domain():
+ domain = request.args.get('domain')
+ type = request.args.get('type')
+ try:
+ page = int(request.args.get('page'))
+ except:
+ page = 1
+ if type in list_types:
+ if is_valid_domain(domain):
+ res = r_serv_onion.srem('blacklist_{}'.format(type), domain)
+ if page:
+ if res == 0:
+ return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=2))
+ else:
+ return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=1))
+ else:
+ return redirect(url_for('hiddenServices.blacklisted_domains', page=page, type=type, unblacklist_domain=0))
+ else:
+ return 'Incorrect type'
@hiddenServices.route("/crawlers/create_spider_splash", methods=['POST'])
def create_spider_splash():
@@ -619,23 +628,26 @@ def domain_crawled_7days_json():
return jsonify(json_domain_stats)
-@hiddenServices.route('/hiddenServices/automatic_onion_crawler_json')
-def automatic_onion_crawler_json():
+@hiddenServices.route('/hiddenServices/domain_crawled_by_type_json')
+def domain_crawled_by_type_json():
current_date = request.args.get('date')
- type = 'onion'
+ type = request.args.get('type')
+ if type in list_types:
- num_day_type = 7
- date_range = get_date_range(num_day_type)
- range_decoder = []
- for date in date_range:
- day_crawled = {}
- day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8]
- day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
- day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
- range_decoder.append(day_crawled)
+ num_day_type = 7
+ date_range = get_date_range(num_day_type)
+ range_decoder = []
+ for date in date_range:
+ day_crawled = {}
+ day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8]
+ day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
+ day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
+ range_decoder.append(day_crawled)
- return jsonify(range_decoder)
+ return jsonify(range_decoder)
+ else:
+ return jsonify('Incorrect Type')
# ========= REGISTRATION =========
app.register_blueprint(hiddenServices, url_prefix=baseUrl)
diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html
new file mode 100644
index 00000000..c59bc9ab
--- /dev/null
+++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_last_by_type.html
@@ -0,0 +1,476 @@
+
+
+
+
+ AIL-Framework
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {% include 'nav_bar.html' %}
+
+
+
+
+ {% include 'crawler/menu_sidebar.html' %}
+
+
+
+
+
+
+
+
+
+
+ Domain |
+ First Seen |
+ Last Check |
+ Status |
+
+
+
+ {% for metadata_domain in last_domains %}
+
+ {{ metadata_domain['domain'] }} |
+ {{'{}/{}/{}'.format(metadata_domain['first_seen'][0:4], metadata_domain['first_seen'][4:6], metadata_domain['first_seen'][6:8])}} |
+ {{'{}/{}/{}'.format(metadata_domain['last_check'][0:4], metadata_domain['last_check'][4:6], metadata_domain['last_check'][6:8])}} |
+
+
+ {{metadata_domain['status_text']}}
+
+ |
+
+ {% endfor %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Select domains by date range :
+
Some quick example text to build on the card title and make up the bulk of the card's content.
+
+
+
+
+
+
+
+
+
+
+
+ {% for crawler in crawler_metadata %}
+
+
+ {{crawler['crawler_info']}}
+ |
+
+ {{crawler['crawling_domain']}}
+ |
+
+ {{crawler['status_info']}}
+ |
+
+ {% endfor %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_manual.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_manual.html
index 9647d733..bc2e6024 100644
--- a/var/www/modules/hiddenServices/templates/Crawler_Splash_manual.html
+++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_manual.html
@@ -26,40 +26,9 @@
-
+ {% include 'crawler/menu_sidebar.html' %}
-
-
-
-
-
+
@@ -162,6 +131,7 @@
var chart = {};
$(document).ready(function(){
$("#page-Crawler").addClass("active");
+ $("#nav_manual_crawler").addClass("active");
manual_crawler_input_controler();
$('#crawler_type').change(function () {
@@ -169,6 +139,20 @@ $(document).ready(function(){
});
});
+function toggle_sidebar(){
+ if($('#nav_menu').is(':visible')){
+ $('#nav_menu').hide();
+ $('#side_menu').removeClass('border-right')
+ $('#side_menu').removeClass('col-lg-2')
+ $('#core_content').removeClass('col-lg-10')
+ }else{
+ $('#nav_menu').show();
+ $('#side_menu').addClass('border-right')
+ $('#side_menu').addClass('col-lg-2')
+ $('#core_content').addClass('col-lg-10')
+ }
+}
+
function manual_crawler_input_controler() {
if($('#crawler_type').is(':checked')){
$("#crawler_epoch_input").show();
diff --git a/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html b/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html
index e6c4f697..1e1a1b7a 100644
--- a/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html
+++ b/var/www/modules/hiddenServices/templates/Crawler_Splash_onion.html
@@ -50,42 +50,7 @@
-
+ {% include 'crawler/menu_sidebar.html' %}
@@ -224,6 +189,7 @@
var chart = {};
$(document).ready(function(){
$("#page-Crawler").addClass("active");
+ $("#nav_onion_crawler").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
diff --git a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html
new file mode 100644
index 00000000..faccf26a
--- /dev/null
+++ b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html
@@ -0,0 +1,82 @@
+
+
+
+
+
AIL-Framework
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {% include 'nav_bar.html' %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ --------------
+
+
+
+ --------------
+
+
+
+
+
+
+
+
+
+
+
diff --git a/var/www/modules/hiddenServices/templates/blacklisted_onion.html b/var/www/modules/hiddenServices/templates/blacklisted_domains.html
similarity index 56%
rename from var/www/modules/hiddenServices/templates/blacklisted_onion.html
rename to var/www/modules/hiddenServices/templates/blacklisted_domains.html
index 501a6c62..c1a28406 100644
--- a/var/www/modules/hiddenServices/templates/blacklisted_onion.html
+++ b/var/www/modules/hiddenServices/templates/blacklisted_domains.html
@@ -24,45 +24,14 @@
-
+ {% include 'crawler/menu_sidebar.html' %}
-
-
-
-
-
+
@@ -70,38 +39,38 @@
-
Blacklist Onion
-
+
Blacklist {{type_name}}
+
- {%if blacklist_onion==2 %}
- This Onion is already blacklisted
+ {%if blacklist_domain==2 %}
+ This {{type_name}} is already blacklisted
{% else %}
- Incorrect Onion address
+ Incorrect {{type_name}} address
{% endif %}
- Onion Blacklisted
+ {{type_name}} Blacklisted
-
+
-
Unblacklist Onion
-
+
Unblacklist {{type_name}}
+
- {%if unblacklist_onion==2 %}
- This Onion is not blacklisted
+ {%if unblacklist_domain==2 %}
+ This {{type_name}} is not blacklisted
{% else %}
- Incorrect Onion address
+ Incorrect {{type_name}} address
{% endif %}
- Onion Unblacklisted
+ {{type_name}} Unblacklisted
-
+
@@ -112,17 +81,17 @@
- Onion |
- Unblacklist Onion |
+ {{type_name}} |
+ Unblacklist {{type_name}} |
- {% for onion in list_blacklisted_1 %}
+ {% for domain in list_blacklisted_1 %}
- {{onion}} |
+ {{domain}} |
-
-
+
+
|
@@ -134,17 +103,17 @@
- Onion |
- Unblacklist Onion |
+ {{type_name}} |
+ Unblacklist {{type_name}} |
- {% for onion in list_blacklisted_2 %}
+ {% for domain in list_blacklisted_2 %}
- {{onion}} |
+ {{domain}} |
-
-
+
+
|
@@ -162,34 +131,34 @@
@@ -220,6 +189,20 @@ $(document).ready(function(){
"order": [[ 0, "asc" ]]
}
);
-
+ $("#page-Crawler").addClass("active");
});
+
+function toggle_sidebar(){
+ if($('#nav_menu').is(':visible')){
+ $('#nav_menu').hide();
+ $('#side_menu').removeClass('border-right')
+ $('#side_menu').removeClass('col-lg-2')
+ $('#core_content').removeClass('col-lg-10')
+ }else{
+ $('#nav_menu').show();
+ $('#side_menu').addClass('border-right')
+ $('#side_menu').addClass('col-lg-2')
+ $('#core_content').addClass('col-lg-10')
+ }
+}
diff --git a/var/www/templates/crawler/menu_sidebar.html b/var/www/templates/crawler/menu_sidebar.html
new file mode 100644
index 00000000..fd2f7c45
--- /dev/null
+++ b/var/www/templates/crawler/menu_sidebar.html
@@ -0,0 +1,48 @@
+