mirror of https://github.com/CIRCL/AIL-framework
chg: [UI] basic navbar + sidebar + refractor
parent
516238025f
commit
c2885589cf
118
bin/Crawler.py
118
bin/Crawler.py
|
@ -16,6 +16,47 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
from Helper import Process
|
||||
from pubsublogger import publisher
|
||||
|
||||
# ======== GLOBAL VARIABLES ========
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
|
||||
config_section = 'Crawler'
|
||||
|
||||
# Setup the I/O queues
|
||||
p = Process(config_section)
|
||||
|
||||
accepted_services = ['onion', 'regular']
|
||||
|
||||
dic_regex = {}
|
||||
dic_regex['onion'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
re.compile(dic_regex['onion'])
|
||||
dic_regex['i2p'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
re.compile(dic_regex['i2p'])
|
||||
dic_regex['regular'] = dic_regex['i2p']
|
||||
|
||||
faup = Faup()
|
||||
|
||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
|
||||
|
||||
r_serv_metadata = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Metadata", "host"),
|
||||
port=p.config.getint("ARDB_Metadata", "port"),
|
||||
db=p.config.getint("ARDB_Metadata", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_cache = redis.StrictRedis(
|
||||
host=p.config.get("Redis_Cache", "host"),
|
||||
port=p.config.getint("Redis_Cache", "port"),
|
||||
db=p.config.getint("Redis_Cache", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_onion = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Onion", "host"),
|
||||
port=p.config.getint("ARDB_Onion", "port"),
|
||||
db=p.config.getint("ARDB_Onion", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
# ======== FUNCTIONS ========
|
||||
def decode_val(value):
|
||||
if value is not None:
|
||||
value = value.decode()
|
||||
|
@ -105,7 +146,7 @@ def crawl_onion(url, domain, date, date_month, message):
|
|||
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling')
|
||||
exit(1)
|
||||
|
||||
|
||||
# ======== MAIN ========
|
||||
if __name__ == '__main__':
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
|
@ -119,83 +160,38 @@ if __name__ == '__main__':
|
|||
if mode == 'automatic':
|
||||
type_hidden_service = 'onion'
|
||||
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
|
||||
publisher.info("Script Crawler started")
|
||||
|
||||
config_section = 'Crawler'
|
||||
|
||||
# Setup the I/O queues
|
||||
p = Process(config_section)
|
||||
|
||||
accepted_services = ['onion', 'regular']
|
||||
|
||||
dic_regex = {}
|
||||
dic_regex['onion'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
re.compile(dic_regex['onion'])
|
||||
dic_regex['i2p'] = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
re.compile(dic_regex['i2p'])
|
||||
dic_regex['regular'] = dic_regex['i2p']
|
||||
|
||||
|
||||
url_onion = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
re.compile(url_onion)
|
||||
url_i2p = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
re.compile(url_i2p)
|
||||
|
||||
if type_hidden_service == 'onion':
|
||||
regex_hidden_service = url_onion
|
||||
elif type_hidden_service == 'i2p':
|
||||
regex_hidden_service = url_i2p
|
||||
elif type_hidden_service == 'regular':
|
||||
regex_hidden_service = url_i2p
|
||||
else:
|
||||
# verify crawler type (type_hidden_service)
|
||||
if type_hidden_service not in accepted_services:
|
||||
print('incorrect crawler type: {}'.format(type_hidden_service))
|
||||
exit(0)
|
||||
else:
|
||||
publisher.info("Script Crawler started")
|
||||
|
||||
# load domains blacklist
|
||||
load_type_blacklist(type_hidden_service)
|
||||
|
||||
splash_url = '{}:{}'.format( p.config.get("Crawler", "splash_url_onion"), splash_port)
|
||||
print('splash url: {}'.format(splash_url))
|
||||
|
||||
crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit")
|
||||
faup = Faup()
|
||||
|
||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
|
||||
|
||||
r_serv_metadata = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Metadata", "host"),
|
||||
port=p.config.getint("ARDB_Metadata", "port"),
|
||||
db=p.config.getint("ARDB_Metadata", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_cache = redis.StrictRedis(
|
||||
host=p.config.get("Redis_Cache", "host"),
|
||||
port=p.config.getint("Redis_Cache", "port"),
|
||||
db=p.config.getint("Redis_Cache", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_onion = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Onion", "host"),
|
||||
port=p.config.getint("ARDB_Onion", "port"),
|
||||
db=p.config.getint("ARDB_Onion", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
# Crawler status
|
||||
r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port)
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting')
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
|
||||
|
||||
# load domains blacklist
|
||||
load_type_blacklist(type_hidden_service)
|
||||
|
||||
while True:
|
||||
|
||||
# Priority Queue - Recovering the streamed message informations.
|
||||
message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service))
|
||||
if mode == 'automatic':
|
||||
# Priority Queue - Recovering the streamed message informations.
|
||||
message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service))
|
||||
|
||||
if message is None:
|
||||
# Recovering the streamed message informations.
|
||||
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
|
||||
if message is None:
|
||||
# Recovering the streamed message informations.
|
||||
message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service))
|
||||
else:
|
||||
pass
|
||||
|
||||
if message is not None:
|
||||
|
||||
|
|
|
@ -75,6 +75,10 @@ def get_onion_status(domain, date):
|
|||
return False
|
||||
# ============= ROUTES ==============
|
||||
|
||||
@hiddenServices.route("/hiddenServices/2", methods=['GET'])
|
||||
def hiddenServices_page_test():
|
||||
return render_template("Crawler_index.html")
|
||||
|
||||
@hiddenServices.route("/hiddenServices/", methods=['GET'])
|
||||
def hiddenServices_page():
|
||||
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL-Framework</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='font-awesome/css/font-awesome.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
|
||||
<style>
|
||||
|
||||
</style>
|
||||
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
|
||||
|
||||
|
||||
<nav class="col-md-2 d-none d-md-block bg-light sidebar border-right">
|
||||
<div class="sidebar-sticky">
|
||||
<ul class="nav flex-column">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link active text-dark" href="#">
|
||||
<i class="fa fa-search"></i>
|
||||
Dashboard <span class="sr-only">(current)</span>
|
||||
</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link text-secondary" href="#">
|
||||
<i class="fa fa-search"></i>
|
||||
Automatic Splash Crawler
|
||||
</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link text-secondary" href="#">
|
||||
<i class="fa fa-search"></i>
|
||||
Manual Splash Crawler
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
|
||||
|
||||
|
||||
<div >
|
||||
<pre>
|
||||
--------------
|
||||
|
||||
|
||||
|
||||
--------------
|
||||
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$("#page-Crawler").addClass("active");
|
||||
});
|
||||
</script>
|
|
@ -0,0 +1,46 @@
|
|||
<nav class="navbar navbar-expand-xl navbar-dark bg-dark">
|
||||
<a class="navbar-brand" href="{{ url_for('dashboard.index') }}">
|
||||
<img src="{{ url_for('static', filename='image/ail-icon.png')}}" alt="AIL" style="width:80px;">
|
||||
</a>
|
||||
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
|
||||
|
||||
<div class="collapse navbar-collapse" id="navbarSupportedContent">
|
||||
<ul class="navbar-nav">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link mr-3" href="{{ url_for('dashboard.index') }}">Home <span class="sr-only">(current)</span></a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="#" aria-disabled="true">Submit</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="#" aria-disabled="true">Browse Pastes</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="#" aria-disabled="true">Leaks Hunter</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" id="page-Crawler" href="#" tabindex="-1" href="#" aria-disabled="true">Crawlers</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="#" aria-disabled="true">Decoded</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="#" aria-disabled="true">Statistics</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link" href="#" aria-disabled="true">Options</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<form class="form-inline my-2 my-lg-0 ml-auto justify-content-center">
|
||||
<div class="form-group">
|
||||
<input class="form-control mr-sm-2" type="search" id="global_search" placeholder="Search" aria-label="Search" aria-describedby="advanced_search">
|
||||
<small id="advanced_search" class="form-text text-muted">Advanced Search</small>
|
||||
</div>
|
||||
<button class="btn btn-outline-info my-2 my-sm-0" type="submit"><i class="fa fa-search"></i></button>
|
||||
</form>
|
||||
</div>
|
||||
</nav>
|
|
@ -31,9 +31,9 @@ unzip temp/d3_${D3_JS_VERSION}.zip -d temp/
|
|||
unzip temp/moment_2.22.2.zip -d temp/
|
||||
unzip temp/daterangepicker_v0.18.0.zip -d temp/
|
||||
|
||||
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/js/bootstrap.min.js ./static/js/
|
||||
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css ./static/css/
|
||||
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css.map ./static/css/
|
||||
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/js/bootstrap.min.js ./static/js/bootstrap4.min.js
|
||||
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css ./static/css/bootstrap4.min.css
|
||||
mv temp/bootstrap-${BOOTSTRAP_VERSION}-dist/css/bootstrap.min.css.map ./static/css/bootstrap4.min.css.map
|
||||
|
||||
mv temp/startbootstrap-sb-admin-${SBADMIN_VERSION} temp/sb-admin
|
||||
mv temp/startbootstrap-sb-admin-2-${SBADMIN_VERSION} temp/sb-admin-2
|
||||
|
|
Loading…
Reference in New Issue