Merge pull request #486 from CIRCL/crawler_v2

Crawler v2 - Add cookiejar - use cookie to bypass login form
pull/457/merge
Thirion Aurélien 2020-04-01 10:44:50 +02:00 committed by GitHub
commit d72f28fd53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 1700 additions and 310 deletions

View File

@ -351,23 +351,24 @@ if __name__ == '__main__':
# get HAR files # get HAR files
default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har") default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har")
if default_crawler_har: if default_crawler_har:
default_crawler_har = 1 default_crawler_har = True
else: else:
default_crawler_har = 0 default_crawler_har = False
# get PNG files # get PNG files
default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png") default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png")
if default_crawler_png: if default_crawler_png:
default_crawler_png = 1 default_crawler_png = True
else: else:
default_crawler_png = 0 default_crawler_png = False
# Default crawler options # Default crawler options
default_crawler_config = {'html': 1, default_crawler_config = {'html': True,
'har': default_crawler_har, 'har': default_crawler_har,
'png': default_crawler_png, 'png': default_crawler_png,
'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"), 'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"),
'closespider_pagecount': p.config.getint("Crawler", "default_crawler_closespider_pagecount"), 'closespider_pagecount': p.config.getint("Crawler", "default_crawler_closespider_pagecount"),
'cookiejar_uuid': None,
'user_agent': p.config.get("Crawler", "default_crawler_user_agent")} 'user_agent': p.config.get("Crawler", "default_crawler_user_agent")}
# Track launched crawler # Track launched crawler

View File

@ -1,10 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import base64
import os import os
import sys import sys
import redis import redis
from hashlib import sha256
from io import BytesIO from io import BytesIO
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
@ -164,6 +166,25 @@ def get_screenshot_file_content(sha256_string):
file_content = BytesIO(f.read()) file_content = BytesIO(f.read())
return file_content return file_content
# if force save, ignore max_size
def save_crawled_screeshot(b64_screenshot, max_size, f_save=False):
screenshot_size = (len(b64_screenshot)*3) /4
if screenshot_size < max_size or f_save:
image_content = base64.standard_b64decode(b64_screenshot.encode())
sha256_string = sha256(image_content).hexdigest()
filepath = get_screenshot_filepath(sha256_string)
if os.path.isfile(filepath):
#print('File already exist')
return sha256_string
# create dir
dirname = os.path.dirname(filepath)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filepath, 'wb') as f:
f.write(image_content)
return sha256_string
return False
def save_screenshot_file(sha256_string, io_content): def save_screenshot_file(sha256_string, io_content):
filepath = get_screenshot_filepath(sha256_string) filepath = get_screenshot_filepath(sha256_string)
if os.path.isfile(filepath): if os.path.isfile(filepath):

532
bin/lib/crawlers.py Executable file
View File

@ -0,0 +1,532 @@
#!/usr/bin/python3
"""
API Helper
===================
"""
import base64
import gzip
import json
import os
import re
import redis
import sys
import uuid
from datetime import datetime, timedelta
from urllib.parse import urlparse
from pyfaup.faup import Faup
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_cache = config_loader.get_redis_conn("Redis_Cache")
config_loader = None
faup = Faup()
def generate_uuid():
return str(uuid.uuid4()).replace('-', '')
################################################################################
# # TODO: handle prefix cookies
# # TODO: fill empty fields
def create_cookie_crawler(cookie_dict, domain, crawler_type='regular'):
# check cookie domain filed
if not 'domain' in cookie_dict:
cookie_dict['domain'] = '.{}'.format(domain)
# tor browser: disable secure cookie
if crawler_type=='onion':
cookie_dict['secure'] = False
# force cookie domain
# url = urlparse(browser_cookie['Host raw'])
# domain = url.netloc.split(':', 1)[0]
# cookie_dict['domain'] = '.{}'.format(domain)
# change expire date
cookie_dict['expires'] = (datetime.now() + timedelta(days=10)).strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
return cookie_dict
def load_crawler_cookies(cookiejar_uuid, domain, crawler_type='regular'):
cookies = get_cookiejar_cookies_list(cookiejar_uuid)
all_cookies = []
for cookie_dict in cookies:
all_cookies.append(create_cookie_crawler(cookie_dict, domain, crawler_type=crawler_type))
return all_cookies
################################################################################
def get_all_cookiejar():
r_serv_onion.smembers('cookiejar:all')
def get_global_cookiejar():
res = r_serv_onion.smembers('cookiejar:global')
if not res:
res = []
return res
def get_user_cookiejar(user_id):
res = r_serv_onion.smembers('cookiejar:user:{}'.format(user_id))
if not res:
res = []
return res
def exist_cookiejar(cookiejar_uuid):
return r_serv_onion.exists('cookiejar_metadata:{}'.format(cookiejar_uuid))
def create_cookiejar(user_id, level=1, description=None):
cookiejar_uuid = str(uuid.uuid4())
r_serv_onion.sadd('cookiejar:all', cookiejar_uuid)
if level==0:
r_serv_onion.sadd('cookiejar:user:{}'.format(user_id), cookiejar_uuid)
else:
r_serv_onion.sadd('cookiejar:global', cookiejar_uuid)
# metadata
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'user_id', user_id)
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'level', level)
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'description', description)
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'date', datetime.now().strftime("%Y%m%d"))
# if json_cookies:
# json_cookies = json.loads(json_cookies) # # TODO: catch Exception
# r_serv_onion.set('cookies:json_cookies:{}'.format(cookies_uuid), json.dumps(json_cookies))
#
# for cookie_dict in l_cookies:
# r_serv_onion.hset('cookies:manual_cookies:{}'.format(cookies_uuid), cookie_dict['name'], cookie_dict['value'])
return cookiejar_uuid
def delete_cookie_jar(cookiejar_uuid):
level = get_cookiejar_level(cookiejar_uuid)
if level == 0:
user_id = get_cookiejar_owner(cookiejar_uuid)
r_serv_onion.srem('cookiejar:user:{}'.format(user_id), cookiejar_uuid)
else:
r_serv_onion.srem('cookiejar:global', cookiejar_uuid)
r_serv_onion.delete('cookiejar_metadata:{}'.format(cookiejar_uuid))
def get_cookiejar_cookies_uuid(cookiejar_uuid):
res = r_serv_onion.smembers('cookiejar:{}:cookies:uuid'.format(cookiejar_uuid))
if not res:
res = []
return res
def get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=False):
l_cookiejar = []
for cookie_uuid in get_cookiejar_cookies_uuid(cookiejar_uuid):
if add_cookie_uuid:
l_cookiejar.append((get_cookie_dict(cookie_uuid), cookie_uuid))
else:
l_cookiejar.append(get_cookie_dict(cookie_uuid))
return l_cookiejar
## Cookiejar metadata ##
def get_cookiejar_description(cookiejar_uuid):
return r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'description')
def get_cookiejar_date(cookiejar_uuid):
return r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'date')
def get_cookiejar_owner(cookiejar_uuid):
return r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'user_id')
def get_cookiejar_date(cookiejar_uuid):
return r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'date')
def get_cookiejar_level(cookiejar_uuid):
res = r_serv_onion.hget('cookiejar_metadata:{}'.format(cookiejar_uuid), 'level')
if not res:
res = 1
return int(res)
def get_cookiejar_metadata(cookiejar_uuid, level=False):
dict_cookiejar = {}
if exist_cookiejar(cookiejar_uuid):
dict_cookiejar['cookiejar_uuid'] = cookiejar_uuid
dict_cookiejar['description'] = get_cookiejar_description(cookiejar_uuid)
dict_cookiejar['date'] = get_cookiejar_date(cookiejar_uuid)
dict_cookiejar['user_id'] = get_cookiejar_owner(cookiejar_uuid)
if level:
dict_cookiejar['level'] = get_cookies_level(cookiejar_uuid)
return dict_cookiejar
def get_cookiejar_metadata_by_iterator(iter_cookiejar_uuid):
l_cookiejar_metadata = []
for cookiejar_uuid in iter_cookiejar_uuid:
l_cookiejar_metadata.append(get_cookiejar_metadata(cookiejar_uuid))
return l_cookiejar_metadata
def edit_cookiejar_description(cookiejar_uuid, description):
r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'description', description)
# # # # # # # #
# #
# COOKIES #
# #
# # # # # # # #
# # # #
# Cookies Fields:
# - name
# - value
# - path (optional)
# - domain (optional)
# - secure (optional)
# - httpOnly (optional)
# - text (optional)
# # # #
def get_cookie_all_keys_name():
return ['name', 'value', 'domain', 'path', 'httpOnly', 'secure']
def exists_cookie(cookie_uuid):
if int(r_serv_onion.scard('cookies:map:cookiejar:{}'.format(cookie_uuid))) > 0:
return True
return False
def get_cookie_value(cookie_uuid, name):
return r_serv_onion.hget('cookiejar:cookie:{}'.format(cookie_uuid), name)
def set_cookie_value(cookie_uuid, name, value):
r_serv_onion.hset('cookiejar:cookie:{}'.format(cookie_uuid), name, value)
def delete_cookie_value(cookie_uuid, name):
r_serv_onion.hdel('cookiejar:cookie:{}'.format(cookie_uuid), name)
def get_cookie_dict(cookie_uuid):
cookie_dict = {}
for key_name in r_serv_onion.hkeys('cookiejar:cookie:{}'.format(cookie_uuid)):
cookie_dict[key_name] = get_cookie_value(cookie_uuid, key_name)
return cookie_dict
# name, value, path=None, httpOnly=None, secure=None, domain=None, text=None
def add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict):
cookie_uuid = generate_uuid()
r_serv_onion.sadd('cookiejar:{}:cookies:uuid'.format(cookiejar_uuid), cookie_uuid)
r_serv_onion.sadd('cookies:map:cookiejar:{}'.format(cookie_uuid), cookiejar_uuid)
set_cookie_value(cookie_uuid, 'name', cookie_dict['name'])
set_cookie_value(cookie_uuid, 'value', cookie_dict['value'])
if 'path' in cookie_dict:
set_cookie_value(cookie_uuid, 'path', cookie_dict['path'])
if 'httpOnly' in cookie_dict:
set_cookie_value(cookie_uuid, 'httpOnly', cookie_dict['httpOnly'])
if 'secure' in cookie_dict:
set_cookie_value(cookie_uuid, 'secure', cookie_dict['secure'])
if 'domain' in cookie_dict:
set_cookie_value(cookie_uuid, 'domain', cookie_dict['domain'])
if 'text' in cookie_dict:
set_cookie_value(cookie_uuid, 'text', cookie_dict['text'])
return cookie_uuid
def add_cookies_to_cookiejar(cookiejar_uuid, l_cookies):
for cookie_dict in l_cookies:
add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict)
def delete_all_cookies_from_cookiejar(cookiejar_uuid):
for cookie_uuid in get_cookiejar_cookies_uuid(cookiejar_uuid):
delete_cookie_from_cookiejar(cookiejar_uuid, cookie_uuid)
def delete_cookie_from_cookiejar(cookiejar_uuid, cookie_uuid):
r_serv_onion.srem('cookiejar:{}:cookies:uuid'.format(cookiejar_uuid), cookie_uuid)
r_serv_onion.srem('cookies:map:cookiejar:{}'.format(cookie_uuid), cookiejar_uuid)
if not exists_cookie(cookie_uuid):
r_serv_onion.delete('cookiejar:cookie:{}'.format(cookie_uuid))
def edit_cookie(cookiejar_uuid, cookie_uuid, cookie_dict):
# delete old keys
for key_name in r_serv_onion.hkeys('cookiejar:cookie:{}'.format(cookie_uuid)):
if key_name not in cookie_dict:
delete_cookie_value(cookie_uuid, key_name)
# add new keys
cookie_all_keys_name = get_cookie_all_keys_name()
for key_name in cookie_dict:
if key_name in cookie_all_keys_name:
set_cookie_value(cookie_uuid, key_name, cookie_dict[key_name])
## - - ##
## Cookies import ## # TODO: add browser type ?
def import_cookies_from_json(json_cookies, cookiejar_uuid):
for cookie in json_cookies:
try:
cookie_dict = unpack_imported_json_cookie(cookie)
add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict)
except KeyError:
return {'error': 'Invalid cookie key, please submit a valid JSON', 'cookiejar_uuid': cookiejar_uuid}
# # TODO: add text field
def unpack_imported_json_cookie(json_cookie):
cookie_dict = {'name': json_cookie['Name raw'], 'value': json_cookie['Content raw']}
if 'Path raw' in json_cookie:
cookie_dict['path'] = json_cookie['Path raw']
if 'httpOnly' in json_cookie:
cookie_dict['httpOnly'] = json_cookie['HTTP only raw'] == 'true'
if 'secure' in json_cookie:
cookie_dict['secure'] = json_cookie['Send for'] == 'Encrypted connections only'
if 'Host raw' in json_cookie:
url = urlparse(json_cookie['Host raw'])
cookie_dict['domain'] = url.netloc.split(':', 1)[0]
return cookie_dict
def misp_cookie_import(misp_object, cookiejar_uuid):
pass
## - - ##
#### COOKIEJAR API ####
def api_import_cookies_from_json(json_cookies_str, cookiejar_uuid): # # TODO: add catch
json_cookies = json.loads(json_cookies_str)
res = import_cookies_from_json(json_cookies, cookiejar_uuid)
if res:
return (res, 400)
#### ####
#### COOKIES API ####
def api_verify_basic_cookiejar(cookiejar_uuid, user_id):
if not exist_cookiejar(cookiejar_uuid):
return ({'error': 'unknow cookiejar uuid', 'cookiejar_uuid': cookiejar_uuid}, 404)
level = get_cookiejar_level(cookiejar_uuid)
if level == 0: # # TODO: check if user is admin
cookie_owner = get_cookiejar_owner(cookiejar_uuid)
if cookie_owner != user_id:
return ({'error': 'The access to this cookiejar is restricted'}, 403)
def api_get_cookiejar_cookies(cookiejar_uuid, user_id):
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
if res:
return res
res = get_cookiejar_cookies_list(cookiejar_uuid)
return (res, 200)
def api_edit_cookiejar_description(user_id, cookiejar_uuid, description):
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
if res:
return res
edit_cookiejar_description(cookiejar_uuid, description)
return ({'cookiejar_uuid': cookiejar_uuid}, 200)
def api_get_cookiejar_cookies_with_uuid(cookiejar_uuid, user_id):
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
if res:
return res
res = get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=True)
return (res, 200)
def api_get_cookies_list_select(user_id):
l_cookiejar = []
for cookies_uuid in get_global_cookiejar():
l_cookiejar.append('{} : {}'.format(get_cookiejar_description(cookies_uuid), cookies_uuid))
for cookies_uuid in get_user_cookiejar(user_id):
l_cookiejar.append('{} : {}'.format(get_cookiejar_description(cookies_uuid), cookies_uuid))
return sorted(l_cookiejar)
def api_delete_cookie_from_cookiejar(user_id, cookiejar_uuid, cookie_uuid):
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
if res:
return res
delete_cookie_from_cookiejar(cookiejar_uuid, cookie_uuid)
return ({'cookiejar_uuid': cookiejar_uuid, 'cookie_uuid': cookie_uuid}, 200)
def api_delete_cookie_jar(user_id, cookiejar_uuid):
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
if res:
return res
delete_cookie_jar(cookiejar_uuid)
return ({'cookiejar_uuid': cookiejar_uuid}, 200)
def api_edit_cookie(user_id, cookiejar_uuid, cookie_uuid, cookie_dict):
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
if res:
return res
if 'name' not in cookie_dict or 'value' not in cookie_dict or cookie_dict['name'] == '':
({'error': 'cookie name or value not provided'}, 400)
edit_cookie(cookiejar_uuid, cookie_uuid, cookie_dict)
return (get_cookie_dict(cookie_uuid), 200)
def api_create_cookie(user_id, cookiejar_uuid, cookie_dict):
res = api_verify_basic_cookiejar(cookiejar_uuid, user_id)
if res:
return res
if 'name' not in cookie_dict or 'value' not in cookie_dict or cookie_dict['name'] == '':
({'error': 'cookie name or value not provided'}, 400)
res = add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict)
return (res, 200)
#### ####
#### CRAWLER TASK ####
def create_crawler_task(url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None):
crawler_config = {}
crawler_config['depth_limit'] = depth_limit
crawler_config['closespider_pagecount'] = max_pages
if screenshot:
crawler_config['screenshot'] = True
else:
crawler_config['screenshot'] = False
if har:
crawler_config['har'] = True
else:
crawler_config['har'] = False
if user_agent:
crawler_config['user_agent'] = user_agent
if cookiejar_uuid:
crawler_config['cookiejar_uuid'] = cookiejar_uuid
if auto_crawler:
crawler_mode = 'auto'
else:
crawler_mode = 'manual'
# get crawler_mode
faup.decode(url)
unpack_url = faup.get()
## TODO: # FIXME: remove me
try:
domain = unpack_url['domain'].decode()
except:
domain = unpack_url['domain']
## TODO: # FIXME: remove me
try:
tld = unpack_url['tld'].decode()
except:
tld = unpack_url['tld']
if tld == 'onion':
crawler_type = 'onion'
else:
crawler_type = 'regular'
save_crawler_config(crawler_mode, crawler_type, crawler_config, domain, url=url)
send_url_to_crawl_in_queue(crawler_mode, crawler_type, url)
def save_crawler_config(crawler_mode, crawler_type, crawler_config, domain, url=None):
if crawler_mode == 'manual':
r_cache.set('crawler_config:{}:{}:{}'.format(crawler_mode, crawler_type, domain), json.dumps(crawler_config))
elif crawler_mode == 'auto':
r_serv_onion.set('crawler_config:{}:{}:{}:{}'.format(crawler_type, crawler_type, domain, url), json.dumps(crawler_config))
def send_url_to_crawl_in_queue(crawler_mode, crawler_type, url):
r_serv_onion.sadd('{}_crawler_priority_queue'.format(crawler_type), '{};{}'.format(url, crawler_mode))
# add auto crawled url for user UI
if crawler_mode == 'auto':
r_serv_onion.sadd('auto_crawler_url:{}'.format(crawler_type), url)
#### ####
#### CRAWLER TASK API ####
def api_create_crawler_task(user_id, url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, cookiejar_uuid=None, user_agent=None):
# validate url
if url is None or url=='' or url=='\n':
return ({'error':'invalid depth limit'}, 400)
if depth_limit:
try:
depth_limit = int(depth_limit)
if depth_limit < 0:
depth_limit = 0
except ValueError:
return ({'error':'invalid depth limit'}, 400)
if max_pages:
try:
max_pages = int(max_pages)
if max_pages < 1:
max_pages = 1
except ValueError:
return ({'error':'invalid max_pages limit'}, 400)
if auto_crawler:
try:
crawler_time = int(crawler_time)
if crawler_time < 0:
return ({'error':'invalid delta bettween two pass of the crawler'}, 400)
except ValueError:
return ({'error':'invalid delta bettween two pass of the crawler'}, 400)
if cookiejar_uuid:
if not exist_cookiejar(cookiejar_uuid):
return ({'error': 'unknow cookiejar uuid', 'cookiejar_uuid': cookiejar_uuid}, 404)
level = get_cookiejar_level(cookiejar_uuid)
if level == 0: # # TODO: check if user is admin
cookie_owner = get_cookiejar_owner(cookiejar_uuid)
if cookie_owner != user_id:
return ({'error': 'The access to this cookiejar is restricted'}, 403)
create_crawler_task(url, screenshot=screenshot, har=har, depth_limit=depth_limit, max_pages=max_pages,
auto_crawler=auto_crawler, crawler_delta=crawler_delta, cookiejar_uuid=cookiejar_uuid, user_agent=user_agent)
return None
#### ####
def is_redirection(domain, last_url):
url = urlparse(last_url)
last_domain = url.netloc
last_domain = last_domain.split('.')
last_domain = '{}.{}'.format(last_domain[-2], last_domain[-1])
return domain != last_domain
# domain up
def create_domain_metadata(domain_type, domain, current_port, date, date_month):
# Add to global set
r_serv_onion.sadd('{}_up:{}'.format(domain_type, date), domain)
r_serv_onion.sadd('full_{}_up'.format(domain_type), domain)
r_serv_onion.sadd('month_{}_up:{}'.format(domain_type, date_month), domain)
# create onion metadata
if not r_serv_onion.exists('{}_metadata:{}'.format(domain_type, domain)):
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'first_seen', date)
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'last_check', date)
# Update domain port number
all_domain_ports = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'ports')
if all_domain_ports:
all_domain_ports = all_domain_ports.split(';')
else:
all_domain_ports = []
if current_port not in all_domain_ports:
all_domain_ports.append(current_port)
r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'ports', ';'.join(all_domain_ports))
# add root_item to history
def add_domain_root_item(root_item, domain_type, domain, epoch_date, port):
# Create/Update crawler history
r_serv_onion.zadd('crawler_history_{}:{}:{}'.format(domain_type, domain, port), epoch_date, root_item)
def create_item_metadata(item_id, domain, url, port, item_father):
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', item_father)
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'domain', '{}:{}'.format(domain, port))
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'real_link', url)
# add this item_id to his father
r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_id)
def create_item_id(item_dir, domain):
if len(domain) > 215:
UUID = domain[-215:]+str(uuid.uuid4())
else:
UUID = domain+str(uuid.uuid4())
return os.path.join(item_dir, UUID)
def save_crawled_item(item_id, item_content):
try:
gzipencoded = gzip.compress(item_content.encode())
gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
return gzip64encoded
except:
print("file error: {}".format(item_id))
return False
def save_har(har_dir, item_id, har_content):
if not os.path.exists(har_dir):
os.makedirs(har_dir)
item_id = item_id.split('/')[-1]
filename = os.path.join(har_dir, item_id + '.json')
with open(filename, 'w') as f:
f.write(json.dumps(har_content))

View File

@ -3,11 +3,8 @@
import os import os
import sys import sys
import gzip
import base64
import uuid import uuid
import datetime import datetime
import base64
import redis import redis
import json import json
import time import time
@ -23,15 +20,73 @@ from scrapy import Spider
from scrapy.linkextractors import LinkExtractor from scrapy.linkextractors import LinkExtractor
from scrapy.crawler import CrawlerProcess, Crawler from scrapy.crawler import CrawlerProcess, Crawler
from scrapy_splash import SplashRequest from scrapy_splash import SplashRequest, SplashJsonResponse
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
from Helper import Process from Helper import Process
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
#import ConfigLoader
import Screenshot
import crawlers
script_cookie = """
function main(splash, args)
-- Default values
splash.js_enabled = true
splash.private_mode_enabled = true
splash.images_enabled = true
splash.webgl_enabled = true
splash.media_source_enabled = true
-- Force enable things
splash.plugins_enabled = true
splash.request_body_enabled = true
splash.response_body_enabled = true
splash.indexeddb_enabled = true
splash.html5_media_enabled = true
splash.http2_enabled = true
-- User defined
splash.resource_timeout = args.resource_timeout
splash.timeout = args.timeout
-- Allow to pass cookies
splash:init_cookies(args.cookies)
-- Run
ok, reason = splash:go{args.url}
if not ok and not reason:find("http") then
return {
error = reason,
last_url = splash:url()
}
end
if reason == "http504" then
splash:set_result_status_code(504)
return ''
end
splash:wait{args.wait}
-- Page instrumentation
-- splash.scroll_position = {y=1000}
splash:wait{args.wait}
-- Response
return {
har = splash:har(),
html = splash:html(),
png = splash:png{render_all=true},
cookies = splash:get_cookies(),
last_url = splash:url()
}
end
"""
class TorSplashCrawler(): class TorSplashCrawler():
def __init__(self, splash_url, crawler_options): def __init__(self, splash_url, crawler_options):
self.process = CrawlerProcess({'LOG_ENABLED': False}) self.process = CrawlerProcess({'LOG_ENABLED': True})
self.crawler = Crawler(self.TorSplashSpider, { self.crawler = Crawler(self.TorSplashSpider, {
'USER_AGENT': crawler_options['user_agent'], 'USER_AGENT': crawler_options['user_agent'],
'SPLASH_URL': splash_url, 'SPLASH_URL': splash_url,
@ -39,24 +94,26 @@ class TorSplashCrawler():
'DOWNLOADER_MIDDLEWARES': {'scrapy_splash.SplashCookiesMiddleware': 723, 'DOWNLOADER_MIDDLEWARES': {'scrapy_splash.SplashCookiesMiddleware': 723,
'scrapy_splash.SplashMiddleware': 725, 'scrapy_splash.SplashMiddleware': 725,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810, 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
}, },
'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,}, 'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,},
'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter', 'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter',
'HTTPERROR_ALLOW_ALL': True, 'HTTPERROR_ALLOW_ALL': True,
'RETRY_TIMES': 2, 'RETRY_TIMES': 2,
'CLOSESPIDER_PAGECOUNT': crawler_options['closespider_pagecount'], 'CLOSESPIDER_PAGECOUNT': crawler_options['closespider_pagecount'],
'DEPTH_LIMIT': crawler_options['depth_limit'] 'DEPTH_LIMIT': crawler_options['depth_limit'],
'SPLASH_COOKIES_DEBUG': False
}) })
def crawl(self, type, crawler_options, date, requested_mode, url, domain, port, original_item): def crawl(self, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item):
self.process.crawl(self.crawler, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, original_item=original_item) self.process.crawl(self.crawler, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, cookies=cookies, original_item=original_item)
self.process.start() self.process.start()
class TorSplashSpider(Spider): class TorSplashSpider(Spider):
name = 'TorSplashSpider' name = 'TorSplashSpider'
def __init__(self, type, crawler_options, date, requested_mode, url, domain, port, original_item, *args, **kwargs): def __init__(self, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item, *args, **kwargs):
self.type = type self.domain_type = type
self.requested_mode = requested_mode self.requested_mode = requested_mode
self.original_item = original_item self.original_item = original_item
self.root_key = None self.root_key = None
@ -68,166 +125,101 @@ class TorSplashCrawler():
self.date_month = date['date_month'] self.date_month = date['date_month']
self.date_epoch = int(date['epoch']) self.date_epoch = int(date['epoch'])
# # TODO: timeout in config self.png = crawler_options['png']
self.arg_crawler = { 'html': crawler_options['html'], self.har = crawler_options['har']
'wait': 10, self.cookies = cookies
'render_all': 1,
'timeout': 30,
'har': crawler_options['har'],
'png': crawler_options['png']}
config_section = 'Crawler' config_section = 'Crawler'
self.p = Process(config_section) self.p = Process(config_section)
self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str )
self.r_cache = redis.StrictRedis( self.har_dir = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str )
host=self.p.config.get("Redis_Cache", "host"),
port=self.p.config.getint("Redis_Cache", "port"),
db=self.p.config.getint("Redis_Cache", "db"),
decode_responses=True)
self.r_serv_log_submit = redis.StrictRedis( self.r_serv_log_submit = redis.StrictRedis(
host=self.p.config.get("Redis_Log_submit", "host"), host=self.p.config.get("Redis_Log_submit", "host"),
port=self.p.config.getint("Redis_Log_submit", "port"), port=self.p.config.getint("Redis_Log_submit", "port"),
db=self.p.config.getint("Redis_Log_submit", "db"), db=self.p.config.getint("Redis_Log_submit", "db"),
decode_responses=True) decode_responses=True)
self.r_serv_metadata = redis.StrictRedis( self.root_key = None
host=self.p.config.get("ARDB_Metadata", "host"),
port=self.p.config.getint("ARDB_Metadata", "port"),
db=self.p.config.getint("ARDB_Metadata", "db"),
decode_responses=True)
self.r_serv_onion = redis.StrictRedis( def build_request_arg(self, cookies):
host=self.p.config.get("ARDB_Onion", "host"), return {'wait': 10,
port=self.p.config.getint("ARDB_Onion", "port"), 'resource_timeout': 30, # /!\ Weird behaviour if timeout < resource_timeout /!\
db=self.p.config.getint("ARDB_Onion", "db"), 'timeout': 30,
decode_responses=True) 'cookies': cookies,
'lua_source': script_cookie
self.crawler_path = os.path.join(self.p.config.get("Directories", "crawled"), date_str ) }
self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"),
self.p.config.get("Directories", "crawled"), date_str )
self.crawled_har = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str )
self.crawled_screenshot = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot") )
def start_requests(self): def start_requests(self):
l_cookies = self.build_request_arg(self.cookies)
yield SplashRequest( yield SplashRequest(
self.start_urls, self.start_urls,
self.parse, self.parse,
errback=self.errback_catcher, errback=self.errback_catcher,
endpoint='render.json', endpoint='execute',
meta={'father': self.original_item, 'root_key': None}, meta={'father': self.original_item},
args=self.arg_crawler args=l_cookies
) )
# # TODO: remove duplicate and anchor
def parse(self,response): def parse(self,response):
#print(response.headers) #print(response.headers)
#print(response.status) #print(response.status)
if response.status == 504: if response.status == 504:
# down ? # no response
print('504 detected') #print('504 detected')
pass
# LUA ERROR # # TODO: print/display errors
elif 'error' in response.data:
if(response.data['error'] == 'network99'):
print('Connection to proxy refused')
else:
print(response.data['error'])
elif response.status != 200: elif response.status != 200:
print('other response: {}'.format(response.status)) print('other response: {}'.format(response.status))
#print(error_log) # detect connection to proxy refused
#detect connection to proxy refused
error_log = (json.loads(response.body.decode())) error_log = (json.loads(response.body.decode()))
if(error_log['info']['text'] == 'Connection to proxy refused'): print(error_log)
print('Connection to proxy refused') #elif crawlers.is_redirection(self.domains[0], response.data['last_url']):
# pass # ignore response
else: else:
#avoid filename too big item_id = crawlers.create_item_id(self.item_dir, self.domains[0])
if len(self.domains[0]) > 215: self.save_crawled_item(item_id, response.data['html'])
UUID = self.domains[0][-215:]+str(uuid.uuid4()) crawlers.create_item_metadata(item_id, self.domains[0], response.data['last_url'], self.port, response.meta['father'])
if self.root_key is None:
self.root_key = item_id
crawlers.add_domain_root_item(item_id, self.domain_type, self.domains[0], self.date_epoch, self.port)
crawlers.create_domain_metadata(self.domain_type, self.domains[0], self.port, self.full_date, self.date_month)
if 'cookies' in response.data:
all_cookies = response.data['cookies']
else: else:
UUID = self.domains[0]+str(uuid.uuid4()) all_cookies = []
filename_paste_full = os.path.join(self.crawled_paste_filemame, UUID)
relative_filename_paste = os.path.join(self.crawler_path, UUID)
filename_har = os.path.join(self.crawled_har, UUID)
# # TODO: modify me # SCREENSHOT
# save new paste on disk if 'png' in response.data:
if self.save_crawled_paste(relative_filename_paste, response.data['html']): sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode)
if sha256_string:
Screenshot.save_item_relationship(sha256_string, item_id)
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
# HAR
if 'har' in response.data:
crawlers.save_har(self.har_dir, item_id, response.data['har'])
# add this paste to the domain crawled set # TODO: # FIXME: put this on cache ? le = LinkExtractor(allow_domains=self.domains, unique=True)
#self.r_serv_onion.sadd('temp:crawled_domain_pastes:{}'.format(self.domains[0]), filename_paste) for link in le.extract_links(response):
l_cookies = self.build_request_arg(all_cookies)
self.r_serv_onion.sadd('{}_up:{}'.format(self.type, self.full_date), self.domains[0]) yield SplashRequest(
self.r_serv_onion.sadd('full_{}_up'.format(self.type), self.domains[0]) link.url,
self.r_serv_onion.sadd('month_{}_up:{}'.format(self.type, self.date_month), self.domains[0]) self.parse,
errback=self.errback_catcher,
# create onion metadata endpoint='execute',
if not self.r_serv_onion.exists('{}_metadata:{}'.format(self.type, self.domains[0])): meta={'father': item_id},
self.r_serv_onion.hset('{}_metadata:{}'.format(self.type, self.domains[0]), 'first_seen', self.full_date) args=l_cookies
)
# create root_key
if self.root_key is None:
self.root_key = relative_filename_paste
# Create/Update crawler history
self.r_serv_onion.zadd('crawler_history_{}:{}:{}'.format(self.type, self.domains[0], self.port), self.date_epoch, self.root_key)
# Update domain port number
all_domain_ports = self.r_serv_onion.hget('{}_metadata:{}'.format(self.type, self.domains[0]), 'ports')
if all_domain_ports:
all_domain_ports = all_domain_ports.split(';')
else:
all_domain_ports = []
if self.port not in all_domain_ports:
all_domain_ports.append(self.port)
self.r_serv_onion.hset('{}_metadata:{}'.format(self.type, self.domains[0]), 'ports', ';'.join(all_domain_ports))
#create paste metadata
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'super_father', self.root_key)
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'father', response.meta['father'])
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'domain', '{}:{}'.format(self.domains[0], self.port))
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'real_link', response.url)
self.r_serv_metadata.sadd('paste_children:'+response.meta['father'], relative_filename_paste)
if 'png' in response.data:
size_screenshot = (len(response.data['png'])*3) /4
if size_screenshot < 5000000 or self.requested_mode: #bytes or manual/auto
image_content = base64.standard_b64decode(response.data['png'].encode())
hash = sha256(image_content).hexdigest()
img_dir_path = os.path.join(hash[0:2], hash[2:4], hash[4:6], hash[6:8], hash[8:10], hash[10:12])
filename_img = os.path.join(self.crawled_screenshot, 'screenshot', img_dir_path, hash[12:] +'.png')
dirname = os.path.dirname(filename_img)
if not os.path.exists(dirname):
os.makedirs(dirname)
if not os.path.exists(filename_img):
with open(filename_img, 'wb') as f:
f.write(image_content)
# add item metadata
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'screenshot', hash)
# add sha256 metadata
self.r_serv_onion.sadd('screenshot:{}'.format(hash), relative_filename_paste)
# domain map
self.r_serv_onion.sadd('domain_screenshot:{}'.format(self.domains[0]), hash)
self.r_serv_onion.sadd('screenshot_domain:{}'.format(hash), self.domains[0])
if 'har' in response.data:
dirname = os.path.dirname(filename_har)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filename_har+'.json', 'wb') as f:
f.write(json.dumps(response.data['har']).encode())
# save external links in set
#lext = LinkExtractor(deny_domains=self.domains, unique=True)
#for link in lext.extract_links(response):
# self.r_serv_onion.sadd('domain_{}_external_links:{}'.format(self.type, self.domains[0]), link.url)
# self.r_serv_metadata.sadd('paste_{}_external_links:{}'.format(self.type, filename_paste), link.url)
le = LinkExtractor(allow_domains=self.domains, unique=True)
for link in le.extract_links(response):
yield SplashRequest(
link.url,
self.parse,
errback=self.errback_catcher,
endpoint='render.json',
meta={'father': relative_filename_paste, 'root_key': response.meta['root_key']},
args=self.arg_crawler
)
def errback_catcher(self, failure): def errback_catcher(self, failure):
# catch all errback failures, # catch all errback failures,
@ -235,7 +227,7 @@ class TorSplashCrawler():
if failure.check(ResponseNeverReceived): if failure.check(ResponseNeverReceived):
request = failure.request request = failure.request
url = request.meta['splash']['args']['url'] url= response.data['last_url']
father = request.meta['father'] father = request.meta['father']
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url) self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
@ -248,62 +240,28 @@ class TorSplashCrawler():
url, url,
self.parse, self.parse,
errback=self.errback_catcher, errback=self.errback_catcher,
endpoint='render.json', endpoint='execute',
meta={'father': father, 'root_key': response.meta['root_key']}, cache_args=['lua_source'],
args=self.arg_crawler meta={'father': father},
args=self.build_request_arg(response.cookiejar)
) )
else: else:
print('failure') print('failure')
#print(failure) #print(failure)
print(failure.type) print(failure.type)
#print(failure.request.meta['item'])
''' def save_crawled_item(self, item_id, item_content):
#if isinstance(failure.value, HttpError): gzip64encoded = crawlers.save_crawled_item(item_id, item_content)
elif failure.check(HttpError):
# you can get the response
response = failure.value.response
print('HttpError')
self.logger.error('HttpError on %s', response.url)
#elif isinstance(failure.value, DNSLookupError):
elif failure.check(DNSLookupError):
# this is the original request
request = failure.request
print(DNSLookupError)
print('DNSLookupError')
self.logger.error('DNSLookupError on %s', request.url)
#elif isinstance(failure.value, TimeoutError):
elif failure.check(TimeoutError):
request = failure.request
print('TimeoutError')
print(TimeoutError)
self.logger.error('TimeoutError on %s', request.url)
'''
def save_crawled_paste(self, filename, content):
if os.path.isfile(filename):
print('File: {} already exist in submitted pastes'.format(filename))
return False
try:
gzipencoded = gzip.compress(content.encode())
gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
except:
print("file error: {}".format(filename))
return False
# Send item to queue
# send paste to Global # send paste to Global
relay_message = "{0} {1}".format(filename, gzip64encoded) relay_message = "{0} {1}".format(item_id, gzip64encoded)
self.p.populate_set_out(relay_message, 'Mixer') self.p.populate_set_out(relay_message, 'Mixer')
# increase nb of paste by feeder name # increase nb of paste by feeder name
self.r_serv_log_submit.hincrby("mixer_cache:list_feeder", "crawler", 1) self.r_serv_log_submit.hincrby("mixer_cache:list_feeder", "crawler", 1)
# tag crawled paste # tag crawled paste
msg = 'infoleak:submission="crawler";{}'.format(filename) msg = 'infoleak:submission="crawler";{}'.format(item_id)
self.p.populate_set_out(msg, 'Tags') self.p.populate_set_out(msg, 'Tags')
return True

View File

@ -9,6 +9,7 @@ from TorSplashCrawler import TorSplashCrawler
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader import ConfigLoader
import crawlers
if __name__ == '__main__': if __name__ == '__main__':
@ -37,7 +38,12 @@ if __name__ == '__main__':
date = crawler_json['date'] date = crawler_json['date']
requested_mode = crawler_json['requested'] requested_mode = crawler_json['requested']
if crawler_options['cookiejar_uuid']:
cookies = crawlers.load_crawler_cookies(crawler_options['cookiejar_uuid'], domain, crawler_type=service_type)
else:
cookies = []
redis_cache.delete('crawler_request:{}'.format(uuid)) redis_cache.delete('crawler_request:{}'.format(uuid))
crawler = TorSplashCrawler(splash_url, crawler_options) crawler = TorSplashCrawler(splash_url, crawler_options)
crawler.crawl(service_type, crawler_options, date, requested_mode, url, domain, port, original_item) crawler.crawl(service_type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item)

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 190 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

View File

@ -10,7 +10,7 @@ import sys
import json import json
import random import random
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, make_response
from flask_login import login_required, current_user, login_user, logout_user from flask_login import login_required, current_user, login_user, logout_user
sys.path.append('modules') sys.path.append('modules')
@ -25,6 +25,7 @@ import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import Domain import Domain
import crawlers
r_cache = Flask_config.r_cache r_cache = Flask_config.r_cache
r_serv_db = Flask_config.r_serv_db r_serv_db = Flask_config.r_serv_db
@ -43,7 +44,47 @@ def api_validator(api_response):
if api_response: if api_response:
return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1] return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1]
def create_json_response(data, status_code):
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
# ============= ROUTES ============== # ============= ROUTES ==============
@crawler_splash.route("/crawlers/manual", methods=['GET'])
@login_required
@login_read_only
def manual():
user_id = current_user.get_id()
l_cookiejar = crawlers.api_get_cookies_list_select(user_id)
return render_template("crawler_manual.html", crawler_enabled=True, l_cookiejar=l_cookiejar)
@crawler_splash.route("/crawlers/send_to_spider", methods=['POST'])
@login_required
@login_analyst
def send_to_spider():
user_id = current_user.get_id()
# POST val
url = request.form.get('url_to_crawl')
auto_crawler = request.form.get('crawler_type')
crawler_delta = request.form.get('crawler_epoch')
screenshot = request.form.get('screenshot')
har = request.form.get('har')
depth_limit = request.form.get('depth_limit')
max_pages = request.form.get('max_pages')
cookiejar_uuid = request.form.get('cookiejar')
if cookiejar_uuid:
if cookiejar_uuid == 'None':
cookiejar_uuid = None
else:
cookiejar_uuid = cookiejar_uuid.rsplit(':')
cookiejar_uuid = cookiejar_uuid[-1].replace(' ', '')
res = crawlers.api_create_crawler_task(user_id, url, screenshot=screenshot, har=har, depth_limit=depth_limit, max_pages=max_pages,
auto_crawler=auto_crawler, crawler_delta=crawler_delta, cookiejar_uuid=cookiejar_uuid)
if res:
return create_json_response(res[0], res[1])
return redirect(url_for('crawler_splash.manual'))
# add route : /crawlers/show_domain # add route : /crawlers/show_domain
@crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST']) @crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST'])
@login_required @login_required
@ -156,3 +197,210 @@ def domains_explorer_web():
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to) dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular') return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')
## Cookiejar ##
@crawler_splash.route('/crawler/cookiejar/add', methods=['GET'])
@login_required
@login_analyst
def crawler_cookiejar_add():
return render_template("add_cookiejar.html")
@crawler_splash.route('/crawler/cookiejar/add_post', methods=['POST'])
@login_required
@login_analyst
def crawler_cookiejar_add_post():
user_id = current_user.get_id()
description = request.form.get('description')
level = request.form.get('level')
if level:
level = 1
else:
level = 0
if 'file' in request.files:
file = request.files['file']
json_cookies = file.read().decode()
else:
json_cookies = None
# Get cookies to add
l_manual_cookie = []
l_invalid_cookie = []
for obj_tuple in list(request.form):
l_input = request.form.getlist(obj_tuple)
if len(l_input) == 2:
if l_input[0]: # cookie_name
cookie_dict = {'name': l_input[0], 'value': l_input[1]}
l_manual_cookie.append(cookie_dict)
elif l_input[1]: # cookie_value
l_invalid_cookie.append({'name': '', 'value': l_input[1]})
if l_invalid_cookie:
return create_json_response({'error': 'invalid cookie', 'invalid fileds': l_invalid_cookie}, 400)
cookiejar_uuid = crawlers.create_cookiejar(user_id, level=level, description=description)
if json_cookies:
res = crawlers.api_import_cookies_from_json(json_cookies, cookiejar_uuid)
if res:
return create_json_response(res[0], res[1])
if l_manual_cookie:
crawlers.add_cookies_to_cookiejar(cookiejar_uuid, l_manual_cookie)
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
@crawler_splash.route('/crawler/cookiejar/all', methods=['GET'])
#@login_required
#@login_read_only
def crawler_cookiejar_all():
user_id = current_user.get_id()
user_cookiejar = crawlers.get_cookiejar_metadata_by_iterator(crawlers.get_user_cookiejar(user_id))
global_cookiejar = crawlers.get_cookiejar_metadata_by_iterator(crawlers.get_global_cookiejar())
return render_template("all_cookiejar.html", user_cookiejar=user_cookiejar, global_cookiejar=global_cookiejar)
@crawler_splash.route('/crawler/cookiejar/show', methods=['GET'])
#@login_required
#@login_read_only
def crawler_cookiejar_show():
user_id = current_user.get_id()
cookiejar_uuid = request.args.get('cookiejar_uuid')
res = crawlers.api_get_cookiejar_cookies_with_uuid(cookiejar_uuid, user_id)
if res[1] !=200:
return create_json_response(res[0], res[1])
cookiejar_metadata = crawlers.get_cookiejar_metadata(cookiejar_uuid, level=False)
l_cookies = []
l_cookie_uuid = []
for cookie in res[0]:
l_cookies.append(json.dumps(cookie[0], indent=4, sort_keys=True))
l_cookie_uuid.append(cookie[1])
return render_template("show_cookiejar.html", cookiejar_uuid=cookiejar_uuid, cookiejar_metadata=cookiejar_metadata,
l_cookies=l_cookies, l_cookie_uuid=l_cookie_uuid)
@crawler_splash.route('/crawler/cookiejar/cookie/delete', methods=['GET'])
#@login_required
#@login_read_only
def crawler_cookiejar_cookie_delete():
user_id = current_user.get_id()
cookiejar_uuid = request.args.get('cookiejar_uuid')
cookie_uuid = request.args.get('cookie_uuid')
res = crawlers.api_delete_cookie_from_cookiejar(user_id, cookiejar_uuid, cookie_uuid)
if res[1] !=200:
return create_json_response(res[0], res[1])
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
@crawler_splash.route('/crawler/cookiejar/delete', methods=['GET'])
#@login_required
#@login_read_only
def crawler_cookiejar_delete():
user_id = current_user.get_id()
cookiejar_uuid = request.args.get('cookiejar_uuid')
res = crawlers.api_delete_cookie_jar(user_id, cookiejar_uuid)
if res[1] !=200:
return create_json_response(res[0], res[1])
return redirect(url_for('crawler_splash.crawler_cookiejar_all'))
@crawler_splash.route('/crawler/cookiejar/edit', methods=['GET'])
@login_required
@login_read_only
def crawler_cookiejar_edit():
user_id = current_user.get_id()
cookiejar_uuid = request.args.get('cookiejar_uuid')
description = request.args.get('description')
res = crawlers.api_edit_cookiejar_description(user_id, cookiejar_uuid, description)
return create_json_response(res[0], res[1])
@crawler_splash.route('/crawler/cookiejar/cookie/edit', methods=['GET'])
@login_required
@login_read_only
def crawler_cookiejar_cookie_edit():
user_id = current_user.get_id()
cookiejar_uuid = request.args.get('cookiejar_uuid')
cookie_uuid = request.args.get('cookie_uuid')
cookie_dict = crawlers.get_cookie_dict(cookie_uuid)
return render_template("edit_cookie.html", cookiejar_uuid=cookiejar_uuid, cookie_uuid=cookie_uuid, cookie_dict=cookie_dict)
@crawler_splash.route('/crawler/cookiejar/cookie/edit_post', methods=['POST'])
@login_required
@login_read_only
def crawler_cookiejar_cookie_edit_post():
user_id = current_user.get_id()
cookiejar_uuid = request.form.get('cookiejar_uuid')
cookie_uuid = request.form.get('cookie_uuid')
name = request.form.get('name')
value = request.form.get('value')
domain = request.form.get('domain')
path = request.form.get('path')
httpOnly = request.form.get('httpOnly')
secure = request.form.get('secure')
cookie_dict = {'name': name, 'value': value}
if domain:
cookie_dict['domain'] = domain
if path:
cookie_dict['path'] = path
if httpOnly:
cookie_dict['httpOnly'] = True
if secure:
cookie_dict['secure'] = True
res = crawlers.api_edit_cookie(user_id, cookiejar_uuid, cookie_uuid, cookie_dict)
if res[1] != 200:
return create_json_response(res[0], res[1])
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
@crawler_splash.route('/crawler/cookiejar/cookie/add', methods=['GET'])
@login_required
@login_read_only
def crawler_cookiejar_cookie_add():
user_id = current_user.get_id()
cookiejar_uuid = request.args.get('cookiejar_uuid')
return render_template("add_cookie.html", cookiejar_uuid=cookiejar_uuid)
@crawler_splash.route('/crawler/cookiejar/cookie/manual_add_post', methods=['POST'])
@login_required
@login_read_only
def crawler_cookiejar_cookie_manual_add_post():
user_id = current_user.get_id()
cookiejar_uuid = request.form.get('cookiejar_uuid')
name = request.form.get('name')
value = request.form.get('value')
domain = request.form.get('domain')
path = request.form.get('path')
httpOnly = request.form.get('httpOnly')
secure = request.form.get('secure')
cookie_dict = {'name': name, 'value': value}
if domain:
cookie_dict['domain'] = domain
if path:
cookie_dict['path'] = path
if httpOnly:
cookie_dict['httpOnly'] = True
if secure:
cookie_dict['secure'] = True
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
@crawler_splash.route('/crawler/cookiejar/cookie/json_add_post', methods=['POST'])
@login_required
@login_read_only
def crawler_cookiejar_cookie_json_add_post():
user_id = current_user.get_id()
cookiejar_uuid = request.form.get('cookiejar_uuid')
if 'file' in request.files:
file = request.files['file']
json_cookies = file.read().decode()
if json_cookies:
res = crawlers.api_import_cookies_from_json(json_cookies, cookiejar_uuid)
return redirect(url_for('crawler_splash.crawler_cookiejar_show', cookiejar_uuid=cookiejar_uuid))
return redirect(url_for('crawler_splash.crawler_cookiejar_cookie_add', cookiejar_uuid=cookiejar_uuid))
## - - ##

View File

@ -30,6 +30,9 @@ r_serv_metadata = Flask_config.r_serv_metadata
crawler_enabled = Flask_config.crawler_enabled crawler_enabled = Flask_config.crawler_enabled
bootstrap_label = Flask_config.bootstrap_label bootstrap_label = Flask_config.bootstrap_label
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import crawlers
hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates') hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
faup = Faup() faup = Faup()
@ -214,18 +217,6 @@ def get_crawler_splash_status(type):
return crawler_metadata return crawler_metadata
def create_crawler_config(mode, service_type, crawler_config, domain, url=None):
if mode == 'manual':
r_cache.set('crawler_config:{}:{}:{}'.format(mode, service_type, domain), json.dumps(crawler_config))
elif mode == 'auto':
r_serv_onion.set('crawler_config:{}:{}:{}:{}'.format(mode, service_type, domain, url), json.dumps(crawler_config))
def send_url_to_crawl_in_queue(mode, service_type, url):
r_serv_onion.sadd('{}_crawler_priority_queue'.format(service_type), '{};{}'.format(url, mode))
# add auto crawled url for user UI
if mode == 'auto':
r_serv_onion.sadd('auto_crawler_url:{}'.format(service_type), url)
def delete_auto_crawler(url): def delete_auto_crawler(url):
domain = get_domain_from_url(url) domain = get_domain_from_url(url)
type = get_type_domain(domain) type = get_type_domain(domain)
@ -257,12 +248,6 @@ def dashboard():
crawler_metadata_regular=crawler_metadata_regular, crawler_metadata_regular=crawler_metadata_regular,
statDomains_onion=statDomains_onion, statDomains_regular=statDomains_regular) statDomains_onion=statDomains_onion, statDomains_regular=statDomains_regular)
@hiddenServices.route("/crawlers/manual", methods=['GET'])
@login_required
@login_read_only
def manual():
return render_template("Crawler_Splash_manual.html", crawler_enabled=crawler_enabled)
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET']) @hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
@login_required @login_required
@login_read_only @login_read_only
@ -389,94 +374,6 @@ def unblacklist_domain():
else: else:
return 'Incorrect type' return 'Incorrect type'
@hiddenServices.route("/crawlers/create_spider_splash", methods=['POST'])
@login_required
@login_analyst
def create_spider_splash():
url = request.form.get('url_to_crawl')
automatic = request.form.get('crawler_type')
crawler_time = request.form.get('crawler_epoch')
#html = request.form.get('html_content_id')
screenshot = request.form.get('screenshot')
har = request.form.get('har')
depth_limit = request.form.get('depth_limit')
max_pages = request.form.get('max_pages')
# validate url
if url is None or url=='' or url=='\n':
return 'incorrect url'
crawler_config = {}
# verify user input
if automatic:
automatic = True
else:
automatic = False
if not screenshot:
crawler_config['png'] = 0
if not har:
crawler_config['har'] = 0
# verify user input
if depth_limit:
try:
depth_limit = int(depth_limit)
if depth_limit < 0:
return 'incorrect depth_limit'
else:
crawler_config['depth_limit'] = depth_limit
except:
return 'incorrect depth_limit'
if max_pages:
try:
max_pages = int(max_pages)
if max_pages < 1:
return 'incorrect max_pages'
else:
crawler_config['closespider_pagecount'] = max_pages
except:
return 'incorrect max_pages'
# get service_type
faup.decode(url)
unpack_url = faup.get()
## TODO: # FIXME: remove me
try:
domain = unpack_url['domain'].decode()
except:
domain = unpack_url['domain']
## TODO: # FIXME: remove me
try:
tld = unpack_url['tld'].decode()
except:
tld = unpack_url['tld']
if tld == 'onion':
service_type = 'onion'
else:
service_type = 'regular'
if automatic:
mode = 'auto'
try:
crawler_time = int(crawler_time)
if crawler_time < 0:
return 'incorrect epoch'
else:
crawler_config['time'] = crawler_time
except:
return 'incorrect epoch'
else:
mode = 'manual'
epoch = None
create_crawler_config(mode, service_type, crawler_config, domain, url=url)
send_url_to_crawl_in_queue(mode, service_type, url)
return redirect(url_for('hiddenServices.manual'))
@hiddenServices.route("/crawlers/auto_crawler", methods=['GET']) @hiddenServices.route("/crawlers/auto_crawler", methods=['GET'])
@login_required @login_required
@login_read_only @login_read_only

View File

@ -0,0 +1,116 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL - Add Cookies</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card mb-3 mt-1">
<div class="card-header text-white bg-dark">
<div class="row">
<div class="col-8">
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Add Cookie to cookiejar: {{cookiejar_uuid}}</h5>
</div>
<div class="col-4">
<a class="btn btn-danger float-right" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_delete') }}?cookiejar_uuid={{cookiejar_uuid}}&cookie_uuid={{cookie_uuid}}">
<i class="fas fa-trash-alt"></i>
</a>
</div>
</div>
</div>
<div class="card-body">
<form action="{{ url_for('crawler_splash.crawler_cookiejar_cookie_manual_add_post') }}" method="post" enctype="multipart/form-data">
<input type="text" name="cookiejar_uuid" value="{{cookiejar_uuid}}" hidden>
{% include 'crawler/crawler_splash/cookie_edit_block.html' %}
<div class="form-group">
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie"></i> Create Cookie</button>
</div>
</form>
<hr>
<form action="{{ url_for('crawler_splash.crawler_cookiejar_cookie_json_add_post') }}" method="post" enctype="multipart/form-data">
<input type="text" name="cookiejar_uuid" value="{{cookiejar_uuid}}" hidden>
<h5>Import cookies from file:</h5>
<div class="form-group">
<label for="file"><b>JSON File</b></label>
<input type="file" class="form-control-file btn btn-outline-secondary" id="file" name="file">
</div>
<div class="form-group">
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie"></i> Import Cookies</button>
</div>
</form>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
$(document).ready(function(){
$('#description-edit-block').hide();
$("#page-crawler").addClass("active");
$("#nav_title_cookiejar").removeClass("text-muted");
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
function show_edit_description(){
console.log('edit');
$('#description-edit-block').show();
}
function edit_description(){
var new_description = $('#input-description').val()
var data_to_send = { cookiejar_uuid: "{{cookiejar_uuid}}", "description": new_description}
$.get("{{ url_for('crawler_splash.crawler_cookiejar_edit') }}", data_to_send, function(data, status){
if(status == "success") {
$('#description-text').text(new_description)
$('#description-edit-block').hide();
}
});
}
</script>

View File

@ -0,0 +1,99 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL - Add Cookies</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card mb-3 mt-1">
<div class="card-header text-white bg-dark">
<h5 class="card-title"><i class="fas fa-box"></i> Create Cookijar <i class="fas fa-cookie"></i></h5>
</div>
<div class="card-body">
<form action="{{ url_for('crawler_splash.crawler_cookiejar_add_post') }}" method="post" enctype="multipart/form-data">
<div class="row">
<div class="col-12 col-md-9">
<div class="input-group mb-2 mr-sm-2">
<div class="input-group-prepend">
<div class="input-group-text"><i class="fas fa-tag"></i></div>
</div>
<input id="description" name="description" class="form-control" placeholder="cookies description - (optional)" type="text">
</div>
</div>
<div class="col-12 col-md-3">
<div class="custom-control custom-switch mt-1">
<input class="custom-control-input" type="checkbox" name="level" id="id_level" checked="">
<label class="custom-control-label" for="id_level">
<i class="fas fa-users"></i>&nbsp;Show cookiejar to all Users
</label>
</div>
</div>
</div>
<hr>
{% include 'crawler/crawler_splash/add_cookies_block.html' %}
<div class="form-group">
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie-bite"></i> Create Cookiejar</button>
</div>
</form>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
var chart = {};
$(document).ready(function(){
$("#page-crawler").addClass("active");
$("#nav_cookiejar_add").addClass("active");
$("#nav_title_cookiejar").removeClass("text-muted");
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>

View File

@ -0,0 +1,58 @@
<h5>Import cookies:</h5>
<div class="form-group">
<label for="file"><b>JSON File</b></label>
<input type="file" class="form-control-file btn btn-outline-secondary" id="file" name="file">
</div>
<hr>
<div>
<h5>Create cookies:</h5>
<div class="row">
<div class="col-5" for="obj_input_cookie_name"><b>Cookie Name</b></div>
<div class="col-6" for="obj_input_cookie_value"><b>Cookie Value</b></div>
</div>
<div class="form-horizontal">
<div class="form-body">
<div class="form-group">
<div class="fields">
<div class="input-group mb-1">
<input type="text" class="form-control col-5" name="first_cookie" id="obj_input_cookie_name">
<input type="text" class="form-control col-6" name="first_cookie" id="obj_input_cookie_value">
<span class="btn btn-info input-group-addon add-field col-1"><i class="fas fa-plus"></i></span>
</div>
<br>
<span class="help-block" hidden>Manual Cookies></span>
</div>
</div>
</div>
</div>
</div>
<script>
var input_1 = '<div class="input-group mb-1"><input type="text" class="form-control col-5" name="'
var input_2 = '"><input type="text" class="form-control col-6" name="'
var input_3 = '">';
var minusButton = '<span class="btn btn-danger input-group-addon delete-field col-1"><i class="fas fa-trash-alt"></i></span></div>';
$('.add-field').click(function() {
var new_uuid = uuidv4();
var template = input_1 + new_uuid + input_2 + new_uuid + input_3;
var temp = $(template).insertBefore('.help-block');
temp.append(minusButton);
});
$('.fields').on('click', '.delete-field', function(){
$(this).parent().remove();
});
function uuidv4() {
return ([1e7]+-1e3+-4e3+-8e3+-1e11).replace(/[018]/g, c =>
(c ^ crypto.getRandomValues(new Uint8Array(1))[0] & 15 >> c / 4).toString(16)
);
}
</script>

View File

@ -0,0 +1,99 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AIL - Cookies</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card mb-3 mt-1">
<div class="card-header text-white bg-dark">
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Your Cookiejar</h5>
</div>
<div class="card-body">
{% with all_cookiejar=user_cookiejar, table_id='table_user'%}
{% include 'crawler/crawler_splash/table_cookiejar.html' %}
{% endwith %}
</div>
</div>
<div class="card mb-3 mt-1">
<div class="card-header text-white bg-dark">
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Global Cookiejar</h5>
</div>
<div class="card-body">
{% with all_cookiejar=global_cookiejar, table_id='table_global'%}
{% include 'crawler/crawler_splash/table_cookiejar.html' %}
{% endwith %}
</div>
</div>
<a class="btn btn-info my-4" href="{{url_for('crawler_splash.crawler_cookiejar_add')}}">
<i class="fas fa-plus-circle ml-auto"></i>
Create Cookiejar
</a>
</div>
</div>
</div>
<script>
$(document).ready(function(){
$("#page-crawler").addClass("active");
$("#nav_cookiejar_all").addClass("active");
$("#nav_title_cookiejar").removeClass("text-muted");
$('#table_user').DataTable({
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
"iDisplayLength": 10,
"order": [[ 0, "desc" ]]
});
$('#table_global').DataTable({
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
"iDisplayLength": 10,
"order": [[ 0, "desc" ]]
});
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
</body>
</html>

View File

@ -0,0 +1,37 @@
<div class="form-group row">
<label for="name" class="col-sm-2 col-form-label">name</label>
<div class="col-sm-10">
<input type="text" class="form-control" id="name" name="name" placeholder="cookie name" value="{%if 'name' in cookie_dict%}{{cookie_dict['name']}}{%endif%}" required>
</div>
</div>
<div class="form-group row">
<label for="value" class="col-sm-2 col-form-label">value</label>
<div class="col-sm-10">
<input type="text" class="form-control" id="value" name="value" placeholder="cookie value" value="{%if 'value' in cookie_dict%}{{cookie_dict['value']}}{%endif%}" required>
</div>
</div>
<div class="form-group row">
<label for="domain" class="col-sm-2 col-form-label">domain</label>
<div class="col-sm-10">
<input type="text" class="form-control" id="domain" name="domain" placeholder=".domain - optional" value="{%if 'domain' in cookie_dict%}{{cookie_dict['domain']}}{%endif%}">
</div>
</div>
<div class="form-group row">
<label for="path" class="col-sm-2 col-form-label">path</label>
<div class="col-sm-10">
<input type="text" class="form-control" id="path" name="path" placeholder="cookie path - optional" value="{%if 'path' in cookie_dict%}{{cookie_dict['path']}}{%endif%}">
</div>
</div>
<div class="custom-control custom-switch mt-1">
<input class="custom-control-input" type="checkbox" name="httpOnly" id="httpOnly" {%if 'httpOnly' in cookie_dict%}{%if cookie_dict['httpOnly']%}checked=""{%endif%}{%endif%}>
<label class="custom-control-label" for="httpOnly">
httpOnly
</label>
</div>
<div class="custom-control custom-switch mt-1">
<input class="custom-control-input" type="checkbox" name="secure" id="secure" {%if 'secure' in cookie_dict%}{%if cookie_dict['secure']%}checked=""{%endif%}{%endif%}>
<label class="custom-control-label" for="secure">
secure
</label>
</div>

View File

@ -0,0 +1,36 @@
{% for dict_cookie in l_elem %}
{% if loop.index0 % 4 == 0 %}
<div class="card-deck mt-3">
{% endif %}
<div class="card">
<div class="card-header py-0">
<div class="d-flex flex-row-reverse">
<div>
<a class="btn btn-light" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_edit') }}?cookiejar_uuid={{cookiejar_uuid}}&cookie_uuid={{l_cookie_uuid[loop.index0]}}" style="font-size: 15px">
<i class="text-secondary fas fa-pencil-alt"></i>
</a>
</div>
<div>
<a class="btn btn-light" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_delete') }}?cookiejar_uuid={{cookiejar_uuid}}&cookie_uuid={{l_cookie_uuid[loop.index0]}}" style="font-size: 15px">
<i class="text-danger fas fa-trash-alt"></i>
</a>
</div>
</div>
</div>
<div class="card-body">
<pre>{{dict_cookie}}</pre>
</div>
</div>
{% if loop.index0 % 4 == 3 %}
</div>
{% endif %}
{% endfor %}
{% if l_elem|length % 4 != 0 %}
</div>
{% endif %}

View File

@ -38,7 +38,7 @@
</div> </div>
<div class="card-body"> <div class="card-body">
<p class="card-text">Enter a domain and choose what kind of data you want.</p> <p class="card-text">Enter a domain and choose what kind of data you want.</p>
<form action="{{ url_for('hiddenServices.create_spider_splash') }}" method='post'> <form action="{{ url_for('crawler_splash.send_to_spider') }}" method='post'>
<div class="row"> <div class="row">
<div class="col-12 col-lg-6"> <div class="col-12 col-lg-6">
<div class="input-group" id="date-range-from"> <div class="input-group" id="date-range-from">
@ -108,8 +108,18 @@
</div> </div>
</div> </div>
</div> </div>
</div> </div>
<div class="mt-1">
<i class="mt-2 text-white fas fa-cookie-bite"></i> Cookiejar:
<select class="custom-select form-control mt-1" name="cookiejar" id="cookiejar">
<option value="None" selected>Don't use any cookiejar</option>
{%for cookiejar in l_cookiejar%}
<option value="{{cookiejar}}">{{cookiejar}}</option>
{%endfor%}
</select>
</div>
</div> </div>
</div> </div>
<button class="btn btn-primary mt-2"> <button class="btn btn-primary mt-2">

View File

@ -0,0 +1,102 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL - Add Cookies</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card mb-3 mt-1">
<div class="card-header text-white bg-dark">
<div class="row">
<div class="col-8">
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Edit Cookie: {{cookie_uuid}}</h5>
</div>
<div class="col-4">
<a class="btn btn-danger float-right" href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_delete') }}?cookiejar_uuid={{cookiejar_uuid}}&cookie_uuid={{cookie_uuid}}">
<i class="fas fa-trash-alt"></i>
</a>
</div>
</div>
</div>
<div class="card-body">
<form action="{{ url_for('crawler_splash.crawler_cookiejar_cookie_edit_post') }}" method="post" enctype="multipart/form-data">
<input type="text" name="cookiejar_uuid" value="{{cookiejar_uuid}}" hidden>
<input type="text" name="cookie_uuid" value="{{cookie_uuid}}" hidden>
{% include 'crawler/crawler_splash/cookie_edit_block.html' %}
<div class="form-group">
<button class="btn btn-info" type="submit" value=Upload><i class="fas fa-cookie-bite"></i> Edit Cookie</button>
</div>
</form>
</div>
</div>
</div>
</div>
</div>
</body>
<script>
$(document).ready(function(){
$('#description-edit-block').hide();
$("#page-crawler").addClass("active");
$("#nav_title_cookiejar").removeClass("text-muted");
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
function show_edit_description(){
console.log('edit');
$('#description-edit-block').show();
}
function edit_description(){
var new_description = $('#input-description').val()
var data_to_send = { cookiejar_uuid: "{{cookiejar_uuid}}", "description": new_description}
$.get("{{ url_for('crawler_splash.crawler_cookiejar_edit') }}", data_to_send, function(data, status){
if(status == "success") {
$('#description-text').text(new_description)
$('#description-edit-block').hide();
}
});
}
</script>

View File

@ -445,7 +445,7 @@
<div class="text-center"> <div class="text-center">
<small class="text-info" style="line-height:0.9;"> <small class="text-info" style="line-height:0.9;">
<a target="_blank" href="" id="screenshot_link"></a> <a target="_blank" href="" id="screenshot_link"></a>
<small> </small>
</div> </div>
</div> </div>
@ -519,11 +519,11 @@ var draw_img = false;
$("#screenshot_link").attr("href", screenshot_href + "{{dict_domain['crawler_history']['random_item']['id']}}"); $("#screenshot_link").attr("href", screenshot_href + "{{dict_domain['crawler_history']['random_item']['id']}}");
$("#screenshot_link").text("{{dict_domain['crawler_history']['random_item']['link']}}"); $("#screenshot_link").text("{{dict_domain['crawler_history']['random_item']['link']}}");
{%else%} {%else%}
var screenshot = ""; var screenshot = "";
{%endif%} {%endif%}
{%endif%} {%endif%}
{%else%} {%else%}
var screenshot = ""; var screenshot = "";
{%endif%} {%endif%}
img.src = base_url + screenshot; img.src = base_url + screenshot;
@ -561,7 +561,9 @@ function img_error() {
} }
function reload_image(new_screenshot, link, item_id) { function reload_image(new_screenshot, link, item_id) {
$("#"+screenshot.replace(/\//g, "")).removeClass("icon_selected").addClass("icon_img"); if (screenshot) {
$("#"+screenshot.replace(/\//g, "")).removeClass("icon_selected").addClass("icon_img");
}
screenshot = new_screenshot; screenshot = new_screenshot;
img.src=base_url + screenshot; img.src=base_url + screenshot;

View File

@ -0,0 +1,122 @@
<!DOCTYPE html>
<html>
<head>
<title>AIL - Add Cookies</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card mb-3 mt-1">
<div class="card-header text-white bg-dark">
<div class="row">
<div class="col-8">
<h5 class="card-title"><i class="fas fa-cookie-bite"></i> Edit Cookiejar</h5>
</div>
<div class="col-4">
<a class="btn btn-danger float-right" href="{{ url_for('crawler_splash.crawler_cookiejar_delete') }}?cookiejar_uuid={{cookiejar_uuid}}">
<i class="fas fa-trash-alt"></i>
</a>
</div>
</div>
</div>
<div class="card-body">
{% with all_cookiejar=[cookiejar_metadata], table_id='table_cookiejar'%}
{% include 'crawler/crawler_splash/table_cookiejar.html' %}
{% endwith %}
<button class="btn btn-info" onclick="show_edit_description();">
Edit Description <i class="fas fa-pencil-alt"></i>
</button>
<a href="{{ url_for('crawler_splash.crawler_cookiejar_cookie_add')}}?cookiejar_uuid={{cookiejar_uuid}}">
<button class="btn btn-primary">
Add Cookies <i class="fas fa-cookie"></i>
</button>
</a>
<span class="mt-1" id="description-edit-block">
<div class="input-group">
<input class="form-control" type="text" id="input-description" value="{{cookiejar_metadata['description']}}"></input>
<div class="input-group-append">
<button class="btn btn-info" onclick="edit_description();">
<i class="fas fa-pencil-alt"></i> Edit
</button>
</div>
</div>
</span>
</div>
</div>
{% with l_elem=l_cookies, l_cookie_uuid=l_cookie_uuid, cookiejar_uuid=cookiejar_uuid %}
{% include 'crawler/crawler_splash/cookies_card_block.html' %}
{% endwith %}
</div>
</div>
</div>
</body>
<script>
var chart = {};
$(document).ready(function(){
$('#description-edit-block').hide();
$("#page-crawler").addClass("active");
$("#nav_title_cookiejar").removeClass("text-muted");
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
function show_edit_description(){
console.log('edit');
$('#description-edit-block').show();
}
function edit_description(){
var new_description = $('#input-description').val()
var data_to_send = { cookiejar_uuid: "{{cookiejar_uuid}}", "description": new_description}
$.get("{{ url_for('crawler_splash.crawler_cookiejar_edit') }}", data_to_send, function(data, status){
if(status == "success") {
$('#description-text').text(new_description)
$('#description-edit-block').hide();
}
});
}
</script>

View File

@ -0,0 +1,28 @@
<table id="{{table_id}}" class="table table-striped table-bordered">
<thead class="bg-dark text-white">
<tr>
<th class="bg-info text-white">Description</th>
<th class="bg-info text-white">Date</th>
<th class="bg-info text-white">UUID</th>
<th class="bg-info text-white">User</th>
</tr>
</thead>
<tbody style="font-size: 15px;">
{% for dict_cookiejar in all_cookiejar %}
<tr>
<td id="description-text">{{dict_cookiejar['description']}}</td>
<td>
{%if dict_cookiejar['date']%}
{{dict_cookiejar['date'][0:4]}}/{{dict_cookiejar['date'][4:6]}}/{{dict_cookiejar['date'][6:8]}}
{%endif%}
</td>
<td>
<a target="_blank" href="{{ url_for('crawler_splash.crawler_cookiejar_show') }}?cookiejar_uuid={{ dict_cookiejar['cookiejar_uuid'] }}">
{{ dict_cookiejar['cookiejar_uuid']}}
</a>
</td>
<td>{{dict_cookiejar['user_id']}}</td>
</tr>
{% endfor %}
</tbody>
</table>

View File

@ -8,7 +8,7 @@
<nav class="navbar navbar-expand navbar-light bg-light flex-md-column flex-row align-items-start py-2" id="nav_menu"> <nav class="navbar navbar-expand navbar-light bg-light flex-md-column flex-row align-items-start py-2" id="nav_menu">
<h5 class="d-flex text-muted w-100"> <h5 class="d-flex text-muted w-100">
<span>Splash Crawlers </span> <span>Splash Crawlers </span>
<a class="ml-auto" href="{{url_for('hiddenServices.manual')}}"> <a class="ml-auto" href="{{url_for('crawler_splash.manual')}}">
<i class="fas fa-plus-circle ml-auto"></i> <i class="fas fa-plus-circle ml-auto"></i>
</a> </a>
</h5> </h5>
@ -32,7 +32,7 @@
</a> </a>
</li> </li>
<li class="nav-item"> <li class="nav-item">
<a class="nav-link" href="{{url_for('hiddenServices.manual')}}" id="nav_manual_crawler"> <a class="nav-link" href="{{url_for('crawler_splash.manual')}}" id="nav_manual_crawler">
<i class="fas fa-spider"></i> <i class="fas fa-spider"></i>
Manual Crawler Manual Crawler
</a> </a>
@ -47,9 +47,6 @@
<h5 class="d-flex text-muted w-100" id="nav_title_domains_explorer"> <h5 class="d-flex text-muted w-100" id="nav_title_domains_explorer">
<span>Domain Explorer </span> <span>Domain Explorer </span>
<a class="ml-auto" href="{{url_for('hiddenServices.manual')}}">
<i class="fas fa-plus-circle ml-auto"></i>
</a>
</h5> </h5>
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills--> <ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100"> <!--nav-pills-->
<li class="nav-item"> <li class="nav-item">
@ -64,5 +61,27 @@
<span>Web Domain</span> <span>Web Domain</span>
</a> </a>
</li> </li>
</nav> </ul>
<h5 class="d-flex text-muted w-100" id="nav_title_cookiejar">
<span>Cookiejar </span>
<a class="ml-auto" href="{{url_for('crawler_splash.crawler_cookiejar_add')}}">
<i class="fas fa-plus-circle ml-auto"></i>
</a>
</h5>
<ul class="nav flex-md-column flex-row navbar-nav justify-content-between w-100">
<li class="nav-item">
<a class="nav-link" href="{{url_for('crawler_splash.crawler_cookiejar_add')}}" id="nav_cookiejar_add">
<i class="fas fa-cookie"></i>
<span>Add Cookiejar</span>
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="{{url_for('crawler_splash.crawler_cookiejar_all')}}" id="nav_cookiejar_all">
<i class="fas fa-cookie-bite"></i>
<span>All Cookiejar</span>
</a>
</li>
</ul>
</nav>
</div> </div>

View File

@ -165,7 +165,6 @@ $('.add-field').click(function() {
}); });
$('.fields').on('click', '.delete-field', function(){ $('.fields').on('click', '.delete-field', function(){
console.log($(this).parent());
$(this).parent().remove(); $(this).parent().remove();
//$.get( "#") //$.get( "#")
}); });