new: Config option for Flask IP and Port, reorganize config loading

pull/92/head
Raphaël Vinot 2020-09-21 16:41:30 +02:00
parent af208eccca
commit 7a34095d9c
8 changed files with 93 additions and 68 deletions

View File

@ -4,7 +4,7 @@
import time
import signal
from subprocess import Popen
from lookyloo.helpers import get_homedir, shutdown_requested, set_running, unset_running, get_socket_path
from lookyloo.helpers import get_homedir, shutdown_requested, set_running, unset_running, get_socket_path, get_config
from redis import StrictRedis
@ -13,10 +13,12 @@ if __name__ == '__main__':
r.delete('cache_loaded')
website_dir = get_homedir() / 'website'
Popen([str(website_dir / '3rdparty.sh')], cwd=website_dir)
ip = get_config('generic', 'website_listen_ip')
port = get_config('generic', 'website_listen_port')
try:
p = Popen(['gunicorn', '-w', '10',
'--graceful-timeout', '2', '--timeout', '300',
'-b', '0.0.0.0:5100',
'-b', f'{ip}:{port}',
'--log-level', 'info',
'web:app'],
cwd=website_dir)

View File

@ -3,6 +3,8 @@
"splash_loglevel": "WARNING",
"only_global_lookups": true,
"public_instance": false,
"website_listen_ip": "0.0.0.0",
"website_listen_port": 5100,
"splash_url": "http://127.0.0.1:8050",
"default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
"cache_clean_user": {},
@ -28,6 +30,8 @@
"loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels",
"splash_loglevel": "(Splash) INFO is *very* verbose.",
"public_instance": "true means disabling features deemed unsafe on a public instance (such as indexing private captures)",
"website_listen_ip": "IP Flask will listen on. Defaults to 0.0.0.0, meaning all interfaces.",
"website_listen_port": "Port Flask will listen on.",
"only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network",
"splash_url": "URL to connect to splash",
"default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA",

View File

@ -1,13 +1,17 @@
{
"VirusTotal": {
"apikey": "KEY",
"apikey": null,
"autosubmit": false
},
"PhishingInitiative": {
"apikey": "KEY",
"apikey": null,
"autosubmit": false
},
"SaneJS": {
"enabled": true
},
"_notes": {
"apikey": "null disables the module. Pass a string otherwise.",
"autosubmit": "Automatically submits the URL to the 3rd party service."
}
}

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import logging
from typing import List, Optional, Dict, Union, Any
from io import BufferedIOBase
from pathlib import Path
@ -23,6 +24,9 @@ try:
except ImportError:
HAS_CF = False
configs: Dict[str, Dict[str, Any]] = {}
logger = logging.getLogger('Lookyloo - Helpers')
def get_homedir() -> Path:
if not os.environ.get('LOOKYLOO_HOME'):
@ -48,7 +52,10 @@ def get_email_template() -> str:
return f.read()
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None) -> Dict[str, Dict[str, Any]]:
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
global configs
if configs is not None:
return
if path_to_config_files:
if isinstance(path_to_config_files, str):
config_path = Path(path_to_config_files)
@ -61,11 +68,28 @@ def load_configs(path_to_config_files: Optional[Union[str, Path]]=None) -> Dict[
elif not config_path.is_dir():
raise ConfigError(f'Configuration directory {config_path} is not a directory.')
to_return = {}
configs = {}
for path in config_path.glob('*.json'):
with path.open() as _c:
to_return[path.stem] = json.load(_c)
return to_return
configs[path.stem] = json.load(_c)
def get_config(config_type: str, entry: str) -> Any:
"""Get an entry from the given config_type file. Automatic fallback to the sample file"""
global configs
if configs is None:
load_configs()
if config_type in configs:
if entry in configs[config_type]:
return configs[config_type][entry]
else:
logger.warning(f'Unable to find {entry} in config file.')
else:
logger.warning('No generic config file available.')
logger.warning('Falling back on sample config, please initialize the generic config file.')
with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c:
sample_config = json.load(_c)
return sample_config[entry]
def safe_create_dir(to_create: Path) -> None:

View File

@ -29,7 +29,7 @@ from scrapysplashwrapper import crawl
from werkzeug.useragents import UserAgent
from .exceptions import NoValidHarFile, MissingUUID
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree, load_known_content
from .helpers import get_homedir, get_socket_path, load_cookies, get_config, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree, load_known_content
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
@ -483,10 +483,9 @@ class Lookyloo():
def __init__(self) -> None:
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.configs: Dict[str, Dict[str, Any]] = load_configs()
self.logger.setLevel(self.get_config('loglevel'))
self.logger.setLevel(get_config('generic', 'loglevel'))
self.indexing = Indexing()
self.is_public_instance = self.get_config('public_instance')
self.is_public_instance = get_config('generic', 'public_instance')
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
self.scrape_dir: Path = get_homedir() / 'scraped'
@ -494,25 +493,21 @@ class Lookyloo():
# In order to have a working default for the docker image, it is easier to use an environment variable
self.splash_url: str = os.environ['SPLASH_URL_DOCKER']
else:
self.splash_url = self.get_config('splash_url')
self.only_global_lookups: bool = self.get_config('only_global_lookups')
self.splash_url = get_config('generic', 'splash_url')
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
safe_create_dir(self.scrape_dir)
# Initialize 3rd party components
if 'modules' not in self.configs:
self.logger.info('No third party components available in the config directory')
else:
if 'PhishingInitiative' in self.configs['modules']:
self.pi = PhishingInitiative(self.configs['modules']['PhishingInitiative'])
self.pi = PhishingInitiative(get_config('modules', 'PhishingInitiative'))
if not self.pi.available:
self.logger.warning('Unable to setup the PhishingInitiative module')
if 'VirusTotal' in self.configs['modules']:
self.vt = VirusTotal(self.configs['modules']['VirusTotal'])
self.vt = VirusTotal(get_config('modules', 'VirusTotal'))
if not self.vt.available:
self.logger.warning('Unable to setup the VirusTotal module')
if 'SaneJS' in self.configs['modules']:
self.sanejs = SaneJavaScript(self.configs['modules']['SaneJS'])
self.sanejs = SaneJavaScript(get_config('modules', 'SaneJS'))
if not self.sanejs.available:
self.logger.warning('Unable to setup the SaneJS module')
@ -633,20 +628,6 @@ class Lookyloo():
remove_pickle_tree(capture_dir)
self.rebuild_cache()
def get_config(self, entry: str) -> Any:
"""Get an entry from the generic config file. Automatic fallback to the sample file"""
if 'generic' in self.configs:
if entry in self.configs['generic']:
return self.configs['generic'][entry]
else:
self.logger.warning(f'Unable to find {entry} in config file.')
else:
self.logger.warning('No generic config file available.')
self.logger.warning('Falling back on sample config, please initialize the generic config file.')
with (get_homedir() / 'config' / 'generic.json.sample').open() as _c:
sample_config = json.load(_c)
return sample_config[entry]
def get_urlnode_from_tree(self, capture_uuid: str, node_uuid: str) -> URLNode:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
@ -873,7 +854,7 @@ class Lookyloo():
return False
def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None:
if not self.get_config('enable_mail_notification'):
if not get_config('generic', 'enable_mail_notification'):
return
redirects = ''
@ -887,7 +868,7 @@ class Lookyloo():
else:
redirects = "No redirects."
email_config = self.get_config('email')
email_config = get_config('generic', 'email')
msg = EmailMessage()
msg['From'] = email_config['from']
if email:
@ -991,15 +972,15 @@ class Lookyloo():
cookies = load_cookies(cookies_pseudofile)
if not user_agent:
# Catch case where the UA is broken on the UI, and the async submission.
ua: str = self.get_config('default_user_agent') # type: ignore
ua: str = get_config('generic', 'default_user_agent') # type: ignore
else:
ua = user_agent
if int(depth) > int(self.get_config('max_depth')): # type: ignore
self.logger.warning(f'Not allowed to scrape on a depth higher than {self.get_config("max_depth")}: {depth}')
depth = int(self.get_config('max_depth')) # type: ignore
if int(depth) > int(get_config('generic', 'max_depth')): # type: ignore
self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}')
depth = int(get_config('generic', 'max_depth')) # type: ignore
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
referer=referer, log_enabled=True, log_level=self.get_config('splash_loglevel'))
referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel'))
if not items:
# broken
return False

12
poetry.lock generated
View File

@ -690,7 +690,7 @@ description = "Pygments is a syntax highlighting package written in Python."
name = "pygments"
optional = false
python-versions = ">=3.5"
version = "2.7.0"
version = "2.7.1"
[[package]]
category = "main"
@ -887,7 +887,7 @@ description = "Scrapy splash wrapper as a standalone library."
name = "scrapysplashwrapper"
optional = false
python-versions = ">=3.7,<4.0"
version = "1.2.3"
version = "1.2.4"
[package.dependencies]
scrapy = ">=1.8.0,<2.0.0"
@ -1566,8 +1566,8 @@ pyeupi = [
{file = "pyeupi-1.1.tar.gz", hash = "sha256:2309c61ac2ef0eafabd6e9f32a0078069ffbba0e113ebc6b51cffc1869094472"},
]
pygments = [
{file = "Pygments-2.7.0-py3-none-any.whl", hash = "sha256:2df50d16b45b977217e02cba6c8422aaddb859f3d0570a88e09b00eafae89c6e"},
{file = "Pygments-2.7.0.tar.gz", hash = "sha256:2594e8fdb06fef91552f86f4fd3a244d148ab24b66042036e64f29a291515048"},
{file = "Pygments-2.7.1-py3-none-any.whl", hash = "sha256:307543fe65c0947b126e83dd5a61bd8acbd84abec11f43caebaf5534cbc17998"},
{file = "Pygments-2.7.1.tar.gz", hash = "sha256:926c3f319eda178d1bd90851e4317e6d8cdb5e292a3386aac9bd75eca29cf9c7"},
]
pyhamcrest = [
{file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"},
@ -1625,8 +1625,8 @@ scrapy-splash = [
{file = "scrapy_splash-0.7.2-py2.py3-none-any.whl", hash = "sha256:71ac958370f8732fec746a25a8235b03a4d3c4c93a59be51aa8e910a08cfe511"},
]
scrapysplashwrapper = [
{file = "scrapysplashwrapper-1.2.3-py3-none-any.whl", hash = "sha256:527fa816517ac260b029b7c045101493a5cce10b1664e2fc07d723ba26531d7e"},
{file = "scrapysplashwrapper-1.2.3.tar.gz", hash = "sha256:2dc99037a6c72f9d796c6ba57e69abeaebbaca6a13cfca41d9ac79d66ef26c83"},
{file = "scrapysplashwrapper-1.2.4-py3-none-any.whl", hash = "sha256:11930ef076d0fdba66caa6045f8c9c142247cb4eb0ba0939e08306a5b4e879bf"},
{file = "scrapysplashwrapper-1.2.4.tar.gz", hash = "sha256:d1185c760dde1bcda389223545ea8fe16bd9308e52c9baa52f654fb91c7a787f"},
]
service-identity = [
{file = "service_identity-18.1.0-py2.py3-none-any.whl", hash = "sha256:001c0707759cb3de7e49c078a7c0c9cd12594161d3bf06b9c254fdcb1a60dc36"},

View File

@ -13,7 +13,7 @@ from flask import Flask, render_template, request, send_file, redirect, url_for,
from flask_bootstrap import Bootstrap # type: ignore
from flask_httpauth import HTTPDigestAuth # type: ignore
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config
from lookyloo.lookyloo import Lookyloo, Indexing
from lookyloo.exceptions import NoValidHarFile, MissingUUID
from .proxied import ReverseProxied
@ -42,11 +42,11 @@ auth = HTTPDigestAuth()
lookyloo: Lookyloo = Lookyloo()
user = lookyloo.get_config('cache_clean_user')
time_delta_on_index = lookyloo.get_config('time_delta_on_index')
blur_screenshot = lookyloo.get_config('enable_default_blur_screenshot')
user = get_config('generic', 'cache_clean_user')
time_delta_on_index = get_config('generic', 'time_delta_on_index')
blur_screenshot = get_config('generic', 'enable_default_blur_screenshot')
logging.basicConfig(level=lookyloo.get_config('loglevel'))
logging.basicConfig(level=get_config('generic', 'loglevel'))
# Method to make sizes in bytes human readable
@ -140,7 +140,7 @@ def scrape_web():
os=request.form.get('os'), browser=request.form.get('browser'))
return redirect(url_for('tree', tree_uuid=perma_uuid))
user_agents: Dict[str, Any] = {}
if lookyloo.get_config('use_user_agents_users'):
if get_config('generic', 'use_user_agents_users'):
lookyloo.build_ua_file()
# NOTE: For now, just generate the file, so we have an idea of the size
# user_agents = get_user_agents('own_user_agents')
@ -191,7 +191,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
keys_request = {
'request_cookie': "/static/cookie_read.png",
}
if lookyloo.get_config('enable_context_by_users'):
if get_config('generic', 'enable_context_by_users'):
enable_context_by_users = True
else:
enable_context_by_users = False
@ -250,9 +250,16 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
if isinstance(posted, bytes):
to_return = BytesIO(posted)
is_blob = True
else:
to_return = BytesIO(posted.encode())
is_blob = False
to_return.seek(0)
if is_blob:
return send_file(to_return, mimetype='application/octet-stream',
as_attachment=True, attachment_filename='posted_data.bin')
else:
return send_file(to_return, mimetype='text/plain',
as_attachment=True, attachment_filename='posted_data.txt')
@ -407,11 +414,11 @@ def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None):
flash(cache['error'], 'error')
try:
if lookyloo.get_config('enable_mail_notification'):
if get_config('generic', 'enable_mail_notification'):
enable_mail_notification = True
else:
enable_mail_notification = False
if lookyloo.get_config('enable_context_by_users'):
if get_config('generic', 'enable_context_by_users'):
enable_context_by_users = True
else:
enable_context_by_users = False

View File

@ -189,6 +189,9 @@
across all the captures on this lookyloo instance, in <b>{{ details['hash_domains_freq'] }}</b> unique domains.
{{ get_ressource_button(tree_uuid, url['url_object'].uuid, hash, 'Download the embedded ressource') }}
</br>
{% if enable_context_by_users %}
{{ context_form(tree_uuid, url['url_object'].uuid, hostnode_uuid, hash, 'hostnode_popup') }}
{% endif %}
{% if 'other_captures' in details %}
{{ indexed_hash(details['other_captures'], hash) }}