mirror of https://github.com/CIRCL/AIL-framework
chg: [crawler] add cookies list by user/global, save cookies from file + dict(name, value), TODO: API + handle errors
parent
db634e8866
commit
1c45571042
|
@ -351,19 +351,19 @@ if __name__ == '__main__':
|
||||||
# get HAR files
|
# get HAR files
|
||||||
default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har")
|
default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har")
|
||||||
if default_crawler_har:
|
if default_crawler_har:
|
||||||
default_crawler_har = 1
|
default_crawler_har = True
|
||||||
else:
|
else:
|
||||||
default_crawler_har = 0
|
default_crawler_har = False
|
||||||
|
|
||||||
# get PNG files
|
# get PNG files
|
||||||
default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png")
|
default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png")
|
||||||
if default_crawler_png:
|
if default_crawler_png:
|
||||||
default_crawler_png = 1
|
default_crawler_png = True
|
||||||
else:
|
else:
|
||||||
default_crawler_png = 0
|
default_crawler_png = False
|
||||||
|
|
||||||
# Default crawler options
|
# Default crawler options
|
||||||
default_crawler_config = {'html': 1,
|
default_crawler_config = {'html': True,
|
||||||
'har': default_crawler_har,
|
'har': default_crawler_har,
|
||||||
'png': default_crawler_png,
|
'png': default_crawler_png,
|
||||||
'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"),
|
'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"),
|
||||||
|
|
|
@ -27,6 +27,12 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||||
config_loader = None
|
config_loader = None
|
||||||
|
|
||||||
|
# # # # # # # #
|
||||||
|
# #
|
||||||
|
# COOKIES #
|
||||||
|
# #
|
||||||
|
# # # # # # # #
|
||||||
|
|
||||||
# # # #
|
# # # #
|
||||||
# Cookies Fields:
|
# Cookies Fields:
|
||||||
# - name
|
# - name
|
||||||
|
@ -69,17 +75,76 @@ def create_cookie_dict_from_browser(browser_cookie):
|
||||||
}
|
}
|
||||||
return dict_cookie
|
return dict_cookie
|
||||||
|
|
||||||
def load_cookies(l_cookies, domain=None, crawler_type='regular'):
|
def load_cookies(cookies_uuid, domain=None, crawler_type='regular'):
|
||||||
|
cookies_json, l_cookies = get_cookies(cookies_uuid)
|
||||||
all_cookies = []
|
all_cookies = []
|
||||||
|
for cookie_dict in cookies_json:
|
||||||
for cookie_dict in l_cookies:
|
|
||||||
all_cookies.append(create_cookie_dict(browser_cookie=cookie_dict, crawler_type=crawler_type))
|
all_cookies.append(create_cookie_dict(browser_cookie=cookie_dict, crawler_type=crawler_type))
|
||||||
|
for cookie_name, cookie_value in l_cookies:
|
||||||
|
all_cookies.append(create_cookie_dict( cookie_name=cookie_name, cookie_value=cookie_value, domain=domain, crawler_type=crawler_type))
|
||||||
return all_cookies
|
return all_cookies
|
||||||
|
|
||||||
def get_cookies():
|
def get_all_cookies():
|
||||||
l_cookies = []
|
r_serv_onion.smembers('cookies:all')
|
||||||
return l_cookies
|
|
||||||
|
def get_all_global_cookies():
|
||||||
|
r_serv_onion.smembers('cookies:global')
|
||||||
|
|
||||||
|
def get_user_cookies(user_id):
|
||||||
|
r_serv_onion.smembers('cookies:user:{}'.format(user_id))
|
||||||
|
|
||||||
|
def exist_cookies_uuid(cookies_uuid):
|
||||||
|
return r_serv_onion.exists('cookie_metadata:{}'.format(cookies_uuid))
|
||||||
|
|
||||||
|
def get_manual_cookies_keys(cookies_uuid):
|
||||||
|
return r_serv_onion.hgetall('cookies:manual_cookies:{}'.format(cookies_uuid))
|
||||||
|
|
||||||
|
def get_manual_cookie_val(cookies_uuid, cookie_name):
|
||||||
|
return r_serv_onion.hget('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_name)
|
||||||
|
|
||||||
|
def get_cookies(cookies_uuid):
|
||||||
|
cookies_json = r_serv_onion.get('cookies:json_cookies:{}'.format(cookies_uuid))
|
||||||
|
if cookies_json:
|
||||||
|
cookies_json = json.loads(cookies_json)
|
||||||
|
else:
|
||||||
|
cookies_json = []
|
||||||
|
l_cookies = [ ( cookie_name, get_manual_cookie_val(cookies_uuid, cookie_name)) for cookie_name in get_manual_cookies_keys(cookies_uuid) ]
|
||||||
|
return (cookies_json, l_cookies)
|
||||||
|
|
||||||
|
# # TODO: handle errors + add api handler
|
||||||
|
def save_cookies(user_id, json_cookies=None, l_cookies=[], cookies_uuid=None, level=1, description=None):
|
||||||
|
if cookies_uuid is None or not exist_cookies_uuid(cookies_uuid):
|
||||||
|
cookies_uuid = str(uuid.uuid4())
|
||||||
|
|
||||||
|
if json_cookies:
|
||||||
|
json_cookies = json.loads(json_cookies) # # TODO: catch Exception
|
||||||
|
r_serv_onion.set('cookies:json_cookies:{}'.format(cookies_uuid), json.dumps(json_cookies))
|
||||||
|
|
||||||
|
for cookie_dict in l_cookies:
|
||||||
|
r_serv_onion.hset('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_dict['name'], cookie_dict['value'])
|
||||||
|
|
||||||
|
# cookies level # # TODO: edit level set on edit
|
||||||
|
r_serv_onion.sadd('cookies:all', cookies_uuid)
|
||||||
|
if level==0:
|
||||||
|
r_serv_onion.sadd('cookies:user:{}'.format(user_id), cookies_uuid)
|
||||||
|
else:
|
||||||
|
r_serv_onion.sadd('cookies:global', cookies_uuid)
|
||||||
|
|
||||||
|
# metadata
|
||||||
|
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'user_id', user_id)
|
||||||
|
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'level', level)
|
||||||
|
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'description', description)
|
||||||
|
r_serv_onion.hset('cookie_metadata:{}'.format(id), 'date', datetime.date.today().strftime("%Y%m%d"))
|
||||||
|
return cookies_uuid
|
||||||
|
|
||||||
|
#### ####
|
||||||
|
|
||||||
|
def is_redirection(domain, last_url):
|
||||||
|
url = urlparse(last_url)
|
||||||
|
last_domain = url.netloc
|
||||||
|
last_domain = last_domain.split('.')
|
||||||
|
last_domain = '{}.{}'.format(last_domain[-2], last_domain[-1])
|
||||||
|
return domain != last_domain
|
||||||
|
|
||||||
# domain up
|
# domain up
|
||||||
def create_domain_metadata(domain_type, domain, current_port, date, date_month):
|
def create_domain_metadata(domain_type, domain, current_port, date, date_month):
|
||||||
|
|
|
@ -121,8 +121,8 @@ class TorSplashCrawler():
|
||||||
self.date_month = date['date_month']
|
self.date_month = date['date_month']
|
||||||
self.date_epoch = int(date['epoch'])
|
self.date_epoch = int(date['epoch'])
|
||||||
|
|
||||||
self.png = True
|
self.png = crawler_options['png']
|
||||||
self.har = True
|
self.har = crawler_options['har']
|
||||||
self.cookies = cookies
|
self.cookies = cookies
|
||||||
|
|
||||||
config_section = 'Crawler'
|
config_section = 'Crawler'
|
||||||
|
@ -176,6 +176,8 @@ class TorSplashCrawler():
|
||||||
# detect connection to proxy refused
|
# detect connection to proxy refused
|
||||||
error_log = (json.loads(response.body.decode()))
|
error_log = (json.loads(response.body.decode()))
|
||||||
print(error_log)
|
print(error_log)
|
||||||
|
elif crawler_splash.is_redirection(self.domains[0], response.data['last_url']):
|
||||||
|
pass # ignore response
|
||||||
else:
|
else:
|
||||||
|
|
||||||
item_id = crawler_splash.create_item_id(self.item_dir, self.domains[0])
|
item_id = crawler_splash.create_item_id(self.item_dir, self.domains[0])
|
||||||
|
|
|
@ -37,8 +37,7 @@ if __name__ == '__main__':
|
||||||
crawler_options = crawler_json['crawler_options']
|
crawler_options = crawler_json['crawler_options']
|
||||||
date = crawler_json['date']
|
date = crawler_json['date']
|
||||||
requested_mode = crawler_json['requested']
|
requested_mode = crawler_json['requested']
|
||||||
cookies = crawler_splash.load_cookies(crawler_splash.get_cookies(), domain, crawler_type='onion')
|
cookies = crawler_splash.load_cookies('ccad0090-bdcb-4ba5-875b-3dae8f936216', domain, crawler_type=service_type)
|
||||||
print(cookies)
|
|
||||||
|
|
||||||
redis_cache.delete('crawler_request:{}'.format(uuid))
|
redis_cache.delete('crawler_request:{}'.format(uuid))
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ import Tag
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||||
import Domain
|
import Domain
|
||||||
|
import crawler_splash
|
||||||
|
|
||||||
r_cache = Flask_config.r_cache
|
r_cache = Flask_config.r_cache
|
||||||
r_serv_db = Flask_config.r_serv_db
|
r_serv_db = Flask_config.r_serv_db
|
||||||
|
@ -156,3 +157,55 @@ def domains_explorer_web():
|
||||||
|
|
||||||
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
|
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
|
||||||
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')
|
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')
|
||||||
|
|
||||||
|
@crawler_splash.route('/crawler/cookies/add', methods=['GET'])
|
||||||
|
#@login_required
|
||||||
|
#@login_analyst
|
||||||
|
def crawler_cookies_add():
|
||||||
|
return render_template("add_cookies.html")
|
||||||
|
|
||||||
|
@crawler_splash.route('/crawler/cookies/add_post', methods=['POST'])
|
||||||
|
#@login_required
|
||||||
|
#@login_analyst
|
||||||
|
def crawler_cookies_add_post():
|
||||||
|
user_id = current_user.get_id()
|
||||||
|
|
||||||
|
description = request.form.get('description')
|
||||||
|
level = request.form.get('level')
|
||||||
|
if level:
|
||||||
|
level = 1
|
||||||
|
else:
|
||||||
|
level = 0
|
||||||
|
|
||||||
|
if 'file' in request.files:
|
||||||
|
file = request.files['file']
|
||||||
|
json_file = file.read().decode()
|
||||||
|
else:
|
||||||
|
json_file = '[]'
|
||||||
|
|
||||||
|
# Get cookies to add
|
||||||
|
l_manual_cookie = []
|
||||||
|
l_invalid_cookie = []
|
||||||
|
for obj_tuple in list(request.form):
|
||||||
|
l_input = request.form.getlist(obj_tuple)
|
||||||
|
if len(l_input) == 2:
|
||||||
|
if l_input[0]: # cookie_name
|
||||||
|
cookie_dict = {'name': l_input[0], 'value': l_input[1]}
|
||||||
|
l_manual_cookie.append(cookie_dict)
|
||||||
|
elif l_input[1]: # cookie_value
|
||||||
|
l_invalid_cookie.append({'name': '', 'value': l_input[1]})
|
||||||
|
else:
|
||||||
|
#print(l_input)
|
||||||
|
pass
|
||||||
|
|
||||||
|
cookie_uuid = crawler_splash.save_cookies(user_id, json_cookies=json_file, l_cookies=l_manual_cookie, level=level, description=description)
|
||||||
|
return render_template("add_cookies.html")
|
||||||
|
|
||||||
|
@crawler_splash.route('/crawler/cookies/all', methods=['GET'])
|
||||||
|
#@login_required
|
||||||
|
#@login_read_only
|
||||||
|
def crawler_cookies_all():
|
||||||
|
user_id = current_user.get_id(user_id)
|
||||||
|
user_cookies = crawler_splash.get_user_cookies(user_id)
|
||||||
|
global_cookies = crawler_splash.get_all_global_cookies()
|
||||||
|
return render_template("add_cookies.html", user_cookies=user_cookies, global_cookies=global_cookies)
|
||||||
|
|
|
@ -0,0 +1,156 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>AIL-Framework</title>
|
||||||
|
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||||
|
<!-- Core CSS -->
|
||||||
|
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||||
|
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
|
||||||
|
|
||||||
|
<!-- JS -->
|
||||||
|
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||||
|
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
|
||||||
|
{% include 'nav_bar.html' %}
|
||||||
|
|
||||||
|
<div class="container-fluid">
|
||||||
|
<div class="row">
|
||||||
|
|
||||||
|
{% include 'crawler/menu_sidebar.html' %}
|
||||||
|
|
||||||
|
<div class="col-12 col-lg-10" id="core_content">
|
||||||
|
|
||||||
|
<div class="card mb-3 mt-1">
|
||||||
|
<div class="card-header text-white bg-dark">
|
||||||
|
<h5 class="card-title"><i class="fas fa-cookie"></i> Add Cookies</h5>
|
||||||
|
</div>
|
||||||
|
<div class="card-body">
|
||||||
|
|
||||||
|
<form action="{{ url_for('crawler_splash.crawler_cookies_add_post') }}" method="post" enctype="multipart/form-data">
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-12 col-md-9">
|
||||||
|
<div class="input-group mb-2 mr-sm-2">
|
||||||
|
<div class="input-group-prepend">
|
||||||
|
<div class="input-group-text"><i class="fas fa-tag"></i></div>
|
||||||
|
</div>
|
||||||
|
<input id="description" name="description" class="form-control" placeholder="cookies description - (optional)" type="text">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-12 col-md-3">
|
||||||
|
<div class="custom-control custom-switch mt-1">
|
||||||
|
<input class="custom-control-input" type="checkbox" name="level" id="id_level" checked="">
|
||||||
|
<label class="custom-control-label" for="id_level">
|
||||||
|
<i class="fas fa-users"></i> Show cookies to all Users
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
|
||||||
|
<div class="form-group">
|
||||||
|
<label for="file"><b>JSON File</b> Cookies to import:</label>
|
||||||
|
<input type="file" class="form-control-file btn btn-outline-secondary" id="file" name="file">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
|
||||||
|
<h5>Add manual cookies:</h5>
|
||||||
|
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-5" for="obj_input_cookie_name"><b>Cookie Name</b></div>
|
||||||
|
<div class="col-6" for="obj_input_cookie_value"><b>Cookie Value</b></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="form-horizontal">
|
||||||
|
<div class="form-body">
|
||||||
|
<div class="form-group">
|
||||||
|
<div class="fields">
|
||||||
|
<div class="input-group mb-1">
|
||||||
|
<input type="text" class="form-control col-5" name="first_cookie" id="obj_input_cookie_name">
|
||||||
|
<input type="text" class="form-control col-6" name="first_cookie" id="obj_input_cookie_value">
|
||||||
|
<span class="btn btn-info input-group-addon add-field col-1"><i class="fas fa-plus"></i></span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<br>
|
||||||
|
|
||||||
|
<span class="help-block" hidden>Manual Cookies></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="form-group">
|
||||||
|
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie-bite"></i> Add Cookies</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</form>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
var chart = {};
|
||||||
|
$(document).ready(function(){
|
||||||
|
$("#page-crawler").addClass("active");
|
||||||
|
$("#nav_cookies_add").addClass("active");
|
||||||
|
$("#nav_title_cookies").removeClass("text-muted");
|
||||||
|
});
|
||||||
|
|
||||||
|
var input_1 = '<div class="input-group mb-1"><input type="text" class="form-control col-5" name="'
|
||||||
|
var input_2 = '"><input type="text" class="form-control col-6" name="'
|
||||||
|
var input_3 = '">';
|
||||||
|
var minusButton = '<span class="btn btn-danger input-group-addon delete-field col-1"><i class="fas fa-trash-alt"></i></span></div>';
|
||||||
|
|
||||||
|
$('.add-field').click(function() {
|
||||||
|
var new_uuid = uuidv4();
|
||||||
|
var template = input_1 + new_uuid + input_2 + new_uuid + input_3;
|
||||||
|
var temp = $(template).insertBefore('.help-block');
|
||||||
|
temp.append(minusButton);
|
||||||
|
});
|
||||||
|
|
||||||
|
$('.fields').on('click', '.delete-field', function(){
|
||||||
|
$(this).parent().remove();
|
||||||
|
});
|
||||||
|
|
||||||
|
function toggle_sidebar(){
|
||||||
|
if($('#nav_menu').is(':visible')){
|
||||||
|
$('#nav_menu').hide();
|
||||||
|
$('#side_menu').removeClass('border-right')
|
||||||
|
$('#side_menu').removeClass('col-lg-2')
|
||||||
|
$('#core_content').removeClass('col-lg-10')
|
||||||
|
}else{
|
||||||
|
$('#nav_menu').show();
|
||||||
|
$('#side_menu').addClass('border-right')
|
||||||
|
$('#side_menu').addClass('col-lg-2')
|
||||||
|
$('#core_content').addClass('col-lg-10')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function uuidv4() {
|
||||||
|
return ([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g, c =>
|
||||||
|
(c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
</script>
|
|
@ -47,9 +47,6 @@
|
||||||
|
|
||||||
<h5 class="d-flex text-muted w-100" id="nav_title_domains_explorer">
|
<h5 class="d-flex text-muted w-100" id="nav_title_domains_explorer">
|
||||||
<span>Domain Explorer </span>
|
<span>Domain Explorer </span>
|
||||||
<a class="ml-auto" href="{{url_for('hiddenServices.manual')}}">
|
|
||||||
<i class="fas fa-plus-circle ml-auto"></i>
|
|
||||||
</a>
|
|
||||||
</h5>
|
</h5>
|
||||||
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
|
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
|
@ -64,5 +61,27 @@
|
||||||
<span>Web Domain</span>
|
<span>Web Domain</span>
|
||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<h5 class="d-flex text-muted w-100" id="nav_title_cookies">
|
||||||
|
<span>Cookies </span>
|
||||||
|
<a class="ml-auto" href="{{url_for('crawler_splash.crawler_cookies_add')}}">
|
||||||
|
<i class="fas fa-plus-circle ml-auto"></i>
|
||||||
|
</a>
|
||||||
|
</h5>
|
||||||
|
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="{{url_for('crawler_splash.crawler_cookies_add')}}" id="nav_cookies_add">
|
||||||
|
<i class="fas fa-cookie"></i>
|
||||||
|
<span>Add Cookies</span>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link" href="{{url_for('crawler_splash.domains_explorer_web')}}" id="nav_cookies_all">
|
||||||
|
<i class="fas fa-cookie-bite"></i>
|
||||||
|
<span>All Cookies</span>
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
</nav>
|
</nav>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -165,7 +165,6 @@ $('.add-field').click(function() {
|
||||||
});
|
});
|
||||||
|
|
||||||
$('.fields').on('click', '.delete-field', function(){
|
$('.fields').on('click', '.delete-field', function(){
|
||||||
console.log($(this).parent());
|
|
||||||
$(this).parent().remove();
|
$(this).parent().remove();
|
||||||
//$.get( "#")
|
//$.get( "#")
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in New Issue