new: Config option for Flask IP and Port, reorganize config loading

pull/92/head
Raphaël Vinot 2020-09-21 16:41:30 +02:00
parent af208eccca
commit 7a34095d9c
8 changed files with 93 additions and 68 deletions

View File

@ -4,7 +4,7 @@
import time import time
import signal import signal
from subprocess import Popen from subprocess import Popen
from lookyloo.helpers import get_homedir, shutdown_requested, set_running, unset_running, get_socket_path from lookyloo.helpers import get_homedir, shutdown_requested, set_running, unset_running, get_socket_path, get_config
from redis import StrictRedis from redis import StrictRedis
@ -13,10 +13,12 @@ if __name__ == '__main__':
r.delete('cache_loaded') r.delete('cache_loaded')
website_dir = get_homedir() / 'website' website_dir = get_homedir() / 'website'
Popen([str(website_dir / '3rdparty.sh')], cwd=website_dir) Popen([str(website_dir / '3rdparty.sh')], cwd=website_dir)
ip = get_config('generic', 'website_listen_ip')
port = get_config('generic', 'website_listen_port')
try: try:
p = Popen(['gunicorn','-w', '10', p = Popen(['gunicorn', '-w', '10',
'--graceful-timeout', '2', '--timeout', '300', '--graceful-timeout', '2', '--timeout', '300',
'-b', '0.0.0.0:5100', '-b', f'{ip}:{port}',
'--log-level', 'info', '--log-level', 'info',
'web:app'], 'web:app'],
cwd=website_dir) cwd=website_dir)

View File

@ -3,6 +3,8 @@
"splash_loglevel": "WARNING", "splash_loglevel": "WARNING",
"only_global_lookups": true, "only_global_lookups": true,
"public_instance": false, "public_instance": false,
"website_listen_ip": "0.0.0.0",
"website_listen_port": 5100,
"splash_url": "http://127.0.0.1:8050", "splash_url": "http://127.0.0.1:8050",
"default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", "default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
"cache_clean_user": {}, "cache_clean_user": {},
@ -28,6 +30,8 @@
"loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels", "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels",
"splash_loglevel": "(Splash) INFO is *very* verbose.", "splash_loglevel": "(Splash) INFO is *very* verbose.",
"public_instance": "true means disabling features deemed unsafe on a public instance (such as indexing private captures)", "public_instance": "true means disabling features deemed unsafe on a public instance (such as indexing private captures)",
"website_listen_ip": "IP Flask will listen on. Defaults to 0.0.0.0, meaning all interfaces.",
"website_listen_port": "Port Flask will listen on.",
"only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network", "only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network",
"splash_url": "URL to connect to splash", "splash_url": "URL to connect to splash",
"default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA", "default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA",

View File

@ -1,13 +1,17 @@
{ {
"VirusTotal": { "VirusTotal": {
"apikey": "KEY", "apikey": null,
"autosubmit": false "autosubmit": false
}, },
"PhishingInitiative": { "PhishingInitiative": {
"apikey": "KEY", "apikey": null,
"autosubmit": false "autosubmit": false
}, },
"SaneJS": { "SaneJS": {
"enabled": true "enabled": true
},
"_notes": {
"apikey": "null disables the module. Pass a string otherwise.",
"autosubmit": "Automatically submits the URL to the 3rd party service."
} }
} }

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os import os
import logging
from typing import List, Optional, Dict, Union, Any from typing import List, Optional, Dict, Union, Any
from io import BufferedIOBase from io import BufferedIOBase
from pathlib import Path from pathlib import Path
@ -23,6 +24,9 @@ try:
except ImportError: except ImportError:
HAS_CF = False HAS_CF = False
configs: Dict[str, Dict[str, Any]] = {}
logger = logging.getLogger('Lookyloo - Helpers')
def get_homedir() -> Path: def get_homedir() -> Path:
if not os.environ.get('LOOKYLOO_HOME'): if not os.environ.get('LOOKYLOO_HOME'):
@ -48,7 +52,10 @@ def get_email_template() -> str:
return f.read() return f.read()
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None) -> Dict[str, Dict[str, Any]]: def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
global configs
if configs is not None:
return
if path_to_config_files: if path_to_config_files:
if isinstance(path_to_config_files, str): if isinstance(path_to_config_files, str):
config_path = Path(path_to_config_files) config_path = Path(path_to_config_files)
@ -61,11 +68,28 @@ def load_configs(path_to_config_files: Optional[Union[str, Path]]=None) -> Dict[
elif not config_path.is_dir(): elif not config_path.is_dir():
raise ConfigError(f'Configuration directory {config_path} is not a directory.') raise ConfigError(f'Configuration directory {config_path} is not a directory.')
to_return = {} configs = {}
for path in config_path.glob('*.json'): for path in config_path.glob('*.json'):
with path.open() as _c: with path.open() as _c:
to_return[path.stem] = json.load(_c) configs[path.stem] = json.load(_c)
return to_return
def get_config(config_type: str, entry: str) -> Any:
"""Get an entry from the given config_type file. Automatic fallback to the sample file"""
global configs
if configs is None:
load_configs()
if config_type in configs:
if entry in configs[config_type]:
return configs[config_type][entry]
else:
logger.warning(f'Unable to find {entry} in config file.')
else:
logger.warning('No generic config file available.')
logger.warning('Falling back on sample config, please initialize the generic config file.')
with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c:
sample_config = json.load(_c)
return sample_config[entry]
def safe_create_dir(to_create: Path) -> None: def safe_create_dir(to_create: Path) -> None:

View File

@ -29,7 +29,7 @@ from scrapysplashwrapper import crawl
from werkzeug.useragents import UserAgent from werkzeug.useragents import UserAgent
from .exceptions import NoValidHarFile, MissingUUID from .exceptions import NoValidHarFile, MissingUUID
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree, load_known_content from .helpers import get_homedir, get_socket_path, load_cookies, get_config, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree, load_known_content
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
@ -483,10 +483,9 @@ class Lookyloo():
def __init__(self) -> None: def __init__(self) -> None:
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.configs: Dict[str, Dict[str, Any]] = load_configs() self.logger.setLevel(get_config('generic', 'loglevel'))
self.logger.setLevel(self.get_config('loglevel'))
self.indexing = Indexing() self.indexing = Indexing()
self.is_public_instance = self.get_config('public_instance') self.is_public_instance = get_config('generic', 'public_instance')
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True) self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
self.scrape_dir: Path = get_homedir() / 'scraped' self.scrape_dir: Path = get_homedir() / 'scraped'
@ -494,27 +493,23 @@ class Lookyloo():
# In order to have a working default for the docker image, it is easier to use an environment variable # In order to have a working default for the docker image, it is easier to use an environment variable
self.splash_url: str = os.environ['SPLASH_URL_DOCKER'] self.splash_url: str = os.environ['SPLASH_URL_DOCKER']
else: else:
self.splash_url = self.get_config('splash_url') self.splash_url = get_config('generic', 'splash_url')
self.only_global_lookups: bool = self.get_config('only_global_lookups') self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
safe_create_dir(self.scrape_dir) safe_create_dir(self.scrape_dir)
# Initialize 3rd party components # Initialize 3rd party components
if 'modules' not in self.configs: self.pi = PhishingInitiative(get_config('modules', 'PhishingInitiative'))
self.logger.info('No third party components available in the config directory') if not self.pi.available:
else: self.logger.warning('Unable to setup the PhishingInitiative module')
if 'PhishingInitiative' in self.configs['modules']:
self.pi = PhishingInitiative(self.configs['modules']['PhishingInitiative']) self.vt = VirusTotal(get_config('modules', 'VirusTotal'))
if not self.pi.available: if not self.vt.available:
self.logger.warning('Unable to setup the PhishingInitiative module') self.logger.warning('Unable to setup the VirusTotal module')
if 'VirusTotal' in self.configs['modules']:
self.vt = VirusTotal(self.configs['modules']['VirusTotal']) self.sanejs = SaneJavaScript(get_config('modules', 'SaneJS'))
if not self.vt.available: if not self.sanejs.available:
self.logger.warning('Unable to setup the VirusTotal module') self.logger.warning('Unable to setup the SaneJS module')
if 'SaneJS' in self.configs['modules']:
self.sanejs = SaneJavaScript(self.configs['modules']['SaneJS'])
if not self.sanejs.available:
self.logger.warning('Unable to setup the SaneJS module')
if hasattr(self, 'sanejs') and self.sanejs.available: if hasattr(self, 'sanejs') and self.sanejs.available:
self.context = Context(self.sanejs) self.context = Context(self.sanejs)
@ -633,20 +628,6 @@ class Lookyloo():
remove_pickle_tree(capture_dir) remove_pickle_tree(capture_dir)
self.rebuild_cache() self.rebuild_cache()
def get_config(self, entry: str) -> Any:
"""Get an entry from the generic config file. Automatic fallback to the sample file"""
if 'generic' in self.configs:
if entry in self.configs['generic']:
return self.configs['generic'][entry]
else:
self.logger.warning(f'Unable to find {entry} in config file.')
else:
self.logger.warning('No generic config file available.')
self.logger.warning('Falling back on sample config, please initialize the generic config file.')
with (get_homedir() / 'config' / 'generic.json.sample').open() as _c:
sample_config = json.load(_c)
return sample_config[entry]
def get_urlnode_from_tree(self, capture_uuid: str, node_uuid: str) -> URLNode: def get_urlnode_from_tree(self, capture_uuid: str, node_uuid: str) -> URLNode:
capture_dir = self.lookup_capture_dir(capture_uuid) capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir: if not capture_dir:
@ -873,7 +854,7 @@ class Lookyloo():
return False return False
def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None: def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None:
if not self.get_config('enable_mail_notification'): if not get_config('generic', 'enable_mail_notification'):
return return
redirects = '' redirects = ''
@ -887,7 +868,7 @@ class Lookyloo():
else: else:
redirects = "No redirects." redirects = "No redirects."
email_config = self.get_config('email') email_config = get_config('generic', 'email')
msg = EmailMessage() msg = EmailMessage()
msg['From'] = email_config['from'] msg['From'] = email_config['from']
if email: if email:
@ -991,15 +972,15 @@ class Lookyloo():
cookies = load_cookies(cookies_pseudofile) cookies = load_cookies(cookies_pseudofile)
if not user_agent: if not user_agent:
# Catch case where the UA is broken on the UI, and the async submission. # Catch case where the UA is broken on the UI, and the async submission.
ua: str = self.get_config('default_user_agent') # type: ignore ua: str = get_config('generic', 'default_user_agent') # type: ignore
else: else:
ua = user_agent ua = user_agent
if int(depth) > int(self.get_config('max_depth')): # type: ignore if int(depth) > int(get_config('generic', 'max_depth')): # type: ignore
self.logger.warning(f'Not allowed to scrape on a depth higher than {self.get_config("max_depth")}: {depth}') self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}')
depth = int(self.get_config('max_depth')) # type: ignore depth = int(get_config('generic', 'max_depth')) # type: ignore
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua, items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
referer=referer, log_enabled=True, log_level=self.get_config('splash_loglevel')) referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel'))
if not items: if not items:
# broken # broken
return False return False

12
poetry.lock generated
View File

@ -690,7 +690,7 @@ description = "Pygments is a syntax highlighting package written in Python."
name = "pygments" name = "pygments"
optional = false optional = false
python-versions = ">=3.5" python-versions = ">=3.5"
version = "2.7.0" version = "2.7.1"
[[package]] [[package]]
category = "main" category = "main"
@ -887,7 +887,7 @@ description = "Scrapy splash wrapper as a standalone library."
name = "scrapysplashwrapper" name = "scrapysplashwrapper"
optional = false optional = false
python-versions = ">=3.7,<4.0" python-versions = ">=3.7,<4.0"
version = "1.2.3" version = "1.2.4"
[package.dependencies] [package.dependencies]
scrapy = ">=1.8.0,<2.0.0" scrapy = ">=1.8.0,<2.0.0"
@ -1566,8 +1566,8 @@ pyeupi = [
{file = "pyeupi-1.1.tar.gz", hash = "sha256:2309c61ac2ef0eafabd6e9f32a0078069ffbba0e113ebc6b51cffc1869094472"}, {file = "pyeupi-1.1.tar.gz", hash = "sha256:2309c61ac2ef0eafabd6e9f32a0078069ffbba0e113ebc6b51cffc1869094472"},
] ]
pygments = [ pygments = [
{file = "Pygments-2.7.0-py3-none-any.whl", hash = "sha256:2df50d16b45b977217e02cba6c8422aaddb859f3d0570a88e09b00eafae89c6e"}, {file = "Pygments-2.7.1-py3-none-any.whl", hash = "sha256:307543fe65c0947b126e83dd5a61bd8acbd84abec11f43caebaf5534cbc17998"},
{file = "Pygments-2.7.0.tar.gz", hash = "sha256:2594e8fdb06fef91552f86f4fd3a244d148ab24b66042036e64f29a291515048"}, {file = "Pygments-2.7.1.tar.gz", hash = "sha256:926c3f319eda178d1bd90851e4317e6d8cdb5e292a3386aac9bd75eca29cf9c7"},
] ]
pyhamcrest = [ pyhamcrest = [
{file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"}, {file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"},
@ -1625,8 +1625,8 @@ scrapy-splash = [
{file = "scrapy_splash-0.7.2-py2.py3-none-any.whl", hash = "sha256:71ac958370f8732fec746a25a8235b03a4d3c4c93a59be51aa8e910a08cfe511"}, {file = "scrapy_splash-0.7.2-py2.py3-none-any.whl", hash = "sha256:71ac958370f8732fec746a25a8235b03a4d3c4c93a59be51aa8e910a08cfe511"},
] ]
scrapysplashwrapper = [ scrapysplashwrapper = [
{file = "scrapysplashwrapper-1.2.3-py3-none-any.whl", hash = "sha256:527fa816517ac260b029b7c045101493a5cce10b1664e2fc07d723ba26531d7e"}, {file = "scrapysplashwrapper-1.2.4-py3-none-any.whl", hash = "sha256:11930ef076d0fdba66caa6045f8c9c142247cb4eb0ba0939e08306a5b4e879bf"},
{file = "scrapysplashwrapper-1.2.3.tar.gz", hash = "sha256:2dc99037a6c72f9d796c6ba57e69abeaebbaca6a13cfca41d9ac79d66ef26c83"}, {file = "scrapysplashwrapper-1.2.4.tar.gz", hash = "sha256:d1185c760dde1bcda389223545ea8fe16bd9308e52c9baa52f654fb91c7a787f"},
] ]
service-identity = [ service-identity = [
{file = "service_identity-18.1.0-py2.py3-none-any.whl", hash = "sha256:001c0707759cb3de7e49c078a7c0c9cd12594161d3bf06b9c254fdcb1a60dc36"}, {file = "service_identity-18.1.0-py2.py3-none-any.whl", hash = "sha256:001c0707759cb3de7e49c078a7c0c9cd12594161d3bf06b9c254fdcb1a60dc36"},

View File

@ -13,7 +13,7 @@ from flask import Flask, render_template, request, send_file, redirect, url_for,
from flask_bootstrap import Bootstrap # type: ignore from flask_bootstrap import Bootstrap # type: ignore
from flask_httpauth import HTTPDigestAuth # type: ignore from flask_httpauth import HTTPDigestAuth # type: ignore
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config
from lookyloo.lookyloo import Lookyloo, Indexing from lookyloo.lookyloo import Lookyloo, Indexing
from lookyloo.exceptions import NoValidHarFile, MissingUUID from lookyloo.exceptions import NoValidHarFile, MissingUUID
from .proxied import ReverseProxied from .proxied import ReverseProxied
@ -42,11 +42,11 @@ auth = HTTPDigestAuth()
lookyloo: Lookyloo = Lookyloo() lookyloo: Lookyloo = Lookyloo()
user = lookyloo.get_config('cache_clean_user') user = get_config('generic', 'cache_clean_user')
time_delta_on_index = lookyloo.get_config('time_delta_on_index') time_delta_on_index = get_config('generic', 'time_delta_on_index')
blur_screenshot = lookyloo.get_config('enable_default_blur_screenshot') blur_screenshot = get_config('generic', 'enable_default_blur_screenshot')
logging.basicConfig(level=lookyloo.get_config('loglevel')) logging.basicConfig(level=get_config('generic', 'loglevel'))
# Method to make sizes in bytes human readable # Method to make sizes in bytes human readable
@ -140,7 +140,7 @@ def scrape_web():
os=request.form.get('os'), browser=request.form.get('browser')) os=request.form.get('os'), browser=request.form.get('browser'))
return redirect(url_for('tree', tree_uuid=perma_uuid)) return redirect(url_for('tree', tree_uuid=perma_uuid))
user_agents: Dict[str, Any] = {} user_agents: Dict[str, Any] = {}
if lookyloo.get_config('use_user_agents_users'): if get_config('generic', 'use_user_agents_users'):
lookyloo.build_ua_file() lookyloo.build_ua_file()
# NOTE: For now, just generate the file, so we have an idea of the size # NOTE: For now, just generate the file, so we have an idea of the size
# user_agents = get_user_agents('own_user_agents') # user_agents = get_user_agents('own_user_agents')
@ -191,7 +191,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
keys_request = { keys_request = {
'request_cookie': "/static/cookie_read.png", 'request_cookie': "/static/cookie_read.png",
} }
if lookyloo.get_config('enable_context_by_users'): if get_config('generic', 'enable_context_by_users'):
enable_context_by_users = True enable_context_by_users = True
else: else:
enable_context_by_users = False enable_context_by_users = False
@ -250,11 +250,18 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
if isinstance(posted, bytes): if isinstance(posted, bytes):
to_return = BytesIO(posted) to_return = BytesIO(posted)
is_blob = True
else: else:
to_return = BytesIO(posted.encode()) to_return = BytesIO(posted.encode())
is_blob = False
to_return.seek(0) to_return.seek(0)
return send_file(to_return, mimetype='text/plain',
as_attachment=True, attachment_filename='posted_data.txt') if is_blob:
return send_file(to_return, mimetype='application/octet-stream',
as_attachment=True, attachment_filename='posted_data.bin')
else:
return send_file(to_return, mimetype='text/plain',
as_attachment=True, attachment_filename='posted_data.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET'])
@ -407,11 +414,11 @@ def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None):
flash(cache['error'], 'error') flash(cache['error'], 'error')
try: try:
if lookyloo.get_config('enable_mail_notification'): if get_config('generic', 'enable_mail_notification'):
enable_mail_notification = True enable_mail_notification = True
else: else:
enable_mail_notification = False enable_mail_notification = False
if lookyloo.get_config('enable_context_by_users'): if get_config('generic', 'enable_context_by_users'):
enable_context_by_users = True enable_context_by_users = True
else: else:
enable_context_by_users = False enable_context_by_users = False

View File

@ -189,6 +189,9 @@
across all the captures on this lookyloo instance, in <b>{{ details['hash_domains_freq'] }}</b> unique domains. across all the captures on this lookyloo instance, in <b>{{ details['hash_domains_freq'] }}</b> unique domains.
{{ get_ressource_button(tree_uuid, url['url_object'].uuid, hash, 'Download the embedded ressource') }} {{ get_ressource_button(tree_uuid, url['url_object'].uuid, hash, 'Download the embedded ressource') }}
</br> </br>
{% if enable_context_by_users %}
{{ context_form(tree_uuid, url['url_object'].uuid, hostnode_uuid, hash, 'hostnode_popup') }}
{% endif %}
{% if 'other_captures' in details %} {% if 'other_captures' in details %}
{{ indexed_hash(details['other_captures'], hash) }} {{ indexed_hash(details['other_captures'], hash) }}