From 7a34095d9c18f2efde8b16c9f1a09d6038ca2786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Mon, 21 Sep 2020 16:41:30 +0200 Subject: [PATCH] new: Config option for Flask IP and Port, reorganize config loading --- bin/start_website.py | 8 +-- config/generic.json.sample | 4 ++ config/modules.json.sample | 8 ++- lookyloo/helpers.py | 32 +++++++++-- lookyloo/lookyloo.py | 65 ++++++++--------------- poetry.lock | 12 ++--- website/web/__init__.py | 29 ++++++---- website/web/templates/hostname_popup.html | 3 ++ 8 files changed, 93 insertions(+), 68 deletions(-) diff --git a/bin/start_website.py b/bin/start_website.py index 54a569b..dd03659 100755 --- a/bin/start_website.py +++ b/bin/start_website.py @@ -4,7 +4,7 @@ import time import signal from subprocess import Popen -from lookyloo.helpers import get_homedir, shutdown_requested, set_running, unset_running, get_socket_path +from lookyloo.helpers import get_homedir, shutdown_requested, set_running, unset_running, get_socket_path, get_config from redis import StrictRedis @@ -13,10 +13,12 @@ if __name__ == '__main__': r.delete('cache_loaded') website_dir = get_homedir() / 'website' Popen([str(website_dir / '3rdparty.sh')], cwd=website_dir) + ip = get_config('generic', 'website_listen_ip') + port = get_config('generic', 'website_listen_port') try: - p = Popen(['gunicorn','-w', '10', + p = Popen(['gunicorn', '-w', '10', '--graceful-timeout', '2', '--timeout', '300', - '-b', '0.0.0.0:5100', + '-b', f'{ip}:{port}', '--log-level', 'info', 'web:app'], cwd=website_dir) diff --git a/config/generic.json.sample b/config/generic.json.sample index c6fa359..4b5a440 100644 --- a/config/generic.json.sample +++ b/config/generic.json.sample @@ -3,6 +3,8 @@ "splash_loglevel": "WARNING", "only_global_lookups": true, "public_instance": false, + "website_listen_ip": "0.0.0.0", + "website_listen_port": 5100, "splash_url": "http://127.0.0.1:8050", "default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", "cache_clean_user": {}, @@ -28,6 +30,8 @@ "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels", "splash_loglevel": "(Splash) INFO is *very* verbose.", "public_instance": "true means disabling features deemed unsafe on a public instance (such as indexing private captures)", + "website_listen_ip": "IP Flask will listen on. Defaults to 0.0.0.0, meaning all interfaces.", + "website_listen_port": "Port Flask will listen on.", "only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network", "splash_url": "URL to connect to splash", "default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA", diff --git a/config/modules.json.sample b/config/modules.json.sample index b1d452e..4753cfb 100644 --- a/config/modules.json.sample +++ b/config/modules.json.sample @@ -1,13 +1,17 @@ { "VirusTotal": { - "apikey": "KEY", + "apikey": null, "autosubmit": false }, "PhishingInitiative": { - "apikey": "KEY", + "apikey": null, "autosubmit": false }, "SaneJS": { "enabled": true + }, + "_notes": { + "apikey": "null disables the module. Pass a string otherwise.", + "autosubmit": "Automatically submits the URL to the 3rd party service." } } diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index 247455e..de6fab1 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- import os +import logging from typing import List, Optional, Dict, Union, Any from io import BufferedIOBase from pathlib import Path @@ -23,6 +24,9 @@ try: except ImportError: HAS_CF = False +configs: Dict[str, Dict[str, Any]] = {} +logger = logging.getLogger('Lookyloo - Helpers') + def get_homedir() -> Path: if not os.environ.get('LOOKYLOO_HOME'): @@ -48,7 +52,10 @@ def get_email_template() -> str: return f.read() -def load_configs(path_to_config_files: Optional[Union[str, Path]]=None) -> Dict[str, Dict[str, Any]]: +def load_configs(path_to_config_files: Optional[Union[str, Path]]=None): + global configs + if configs is not None: + return if path_to_config_files: if isinstance(path_to_config_files, str): config_path = Path(path_to_config_files) @@ -61,11 +68,28 @@ def load_configs(path_to_config_files: Optional[Union[str, Path]]=None) -> Dict[ elif not config_path.is_dir(): raise ConfigError(f'Configuration directory {config_path} is not a directory.') - to_return = {} + configs = {} for path in config_path.glob('*.json'): with path.open() as _c: - to_return[path.stem] = json.load(_c) - return to_return + configs[path.stem] = json.load(_c) + + +def get_config(config_type: str, entry: str) -> Any: + """Get an entry from the given config_type file. Automatic fallback to the sample file""" + global configs + if configs is None: + load_configs() + if config_type in configs: + if entry in configs[config_type]: + return configs[config_type][entry] + else: + logger.warning(f'Unable to find {entry} in config file.') + else: + logger.warning('No generic config file available.') + logger.warning('Falling back on sample config, please initialize the generic config file.') + with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c: + sample_config = json.load(_c) + return sample_config[entry] def safe_create_dir(to_create: Path) -> None: diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 9c750d2..31c76a8 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -29,7 +29,7 @@ from scrapysplashwrapper import crawl from werkzeug.useragents import UserAgent from .exceptions import NoValidHarFile, MissingUUID -from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree, load_known_content +from .helpers import get_homedir, get_socket_path, load_cookies, get_config, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree, load_known_content from .modules import VirusTotal, SaneJavaScript, PhishingInitiative @@ -483,10 +483,9 @@ class Lookyloo(): def __init__(self) -> None: self.logger = logging.getLogger(f'{self.__class__.__name__}') - self.configs: Dict[str, Dict[str, Any]] = load_configs() - self.logger.setLevel(self.get_config('loglevel')) + self.logger.setLevel(get_config('generic', 'loglevel')) self.indexing = Indexing() - self.is_public_instance = self.get_config('public_instance') + self.is_public_instance = get_config('generic', 'public_instance') self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True) self.scrape_dir: Path = get_homedir() / 'scraped' @@ -494,27 +493,23 @@ class Lookyloo(): # In order to have a working default for the docker image, it is easier to use an environment variable self.splash_url: str = os.environ['SPLASH_URL_DOCKER'] else: - self.splash_url = self.get_config('splash_url') - self.only_global_lookups: bool = self.get_config('only_global_lookups') + self.splash_url = get_config('generic', 'splash_url') + self.only_global_lookups: bool = get_config('generic', 'only_global_lookups') safe_create_dir(self.scrape_dir) # Initialize 3rd party components - if 'modules' not in self.configs: - self.logger.info('No third party components available in the config directory') - else: - if 'PhishingInitiative' in self.configs['modules']: - self.pi = PhishingInitiative(self.configs['modules']['PhishingInitiative']) - if not self.pi.available: - self.logger.warning('Unable to setup the PhishingInitiative module') - if 'VirusTotal' in self.configs['modules']: - self.vt = VirusTotal(self.configs['modules']['VirusTotal']) - if not self.vt.available: - self.logger.warning('Unable to setup the VirusTotal module') - if 'SaneJS' in self.configs['modules']: - self.sanejs = SaneJavaScript(self.configs['modules']['SaneJS']) - if not self.sanejs.available: - self.logger.warning('Unable to setup the SaneJS module') + self.pi = PhishingInitiative(get_config('modules', 'PhishingInitiative')) + if not self.pi.available: + self.logger.warning('Unable to setup the PhishingInitiative module') + + self.vt = VirusTotal(get_config('modules', 'VirusTotal')) + if not self.vt.available: + self.logger.warning('Unable to setup the VirusTotal module') + + self.sanejs = SaneJavaScript(get_config('modules', 'SaneJS')) + if not self.sanejs.available: + self.logger.warning('Unable to setup the SaneJS module') if hasattr(self, 'sanejs') and self.sanejs.available: self.context = Context(self.sanejs) @@ -633,20 +628,6 @@ class Lookyloo(): remove_pickle_tree(capture_dir) self.rebuild_cache() - def get_config(self, entry: str) -> Any: - """Get an entry from the generic config file. Automatic fallback to the sample file""" - if 'generic' in self.configs: - if entry in self.configs['generic']: - return self.configs['generic'][entry] - else: - self.logger.warning(f'Unable to find {entry} in config file.') - else: - self.logger.warning('No generic config file available.') - self.logger.warning('Falling back on sample config, please initialize the generic config file.') - with (get_homedir() / 'config' / 'generic.json.sample').open() as _c: - sample_config = json.load(_c) - return sample_config[entry] - def get_urlnode_from_tree(self, capture_uuid: str, node_uuid: str) -> URLNode: capture_dir = self.lookup_capture_dir(capture_uuid) if not capture_dir: @@ -873,7 +854,7 @@ class Lookyloo(): return False def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None: - if not self.get_config('enable_mail_notification'): + if not get_config('generic', 'enable_mail_notification'): return redirects = '' @@ -887,7 +868,7 @@ class Lookyloo(): else: redirects = "No redirects." - email_config = self.get_config('email') + email_config = get_config('generic', 'email') msg = EmailMessage() msg['From'] = email_config['from'] if email: @@ -991,15 +972,15 @@ class Lookyloo(): cookies = load_cookies(cookies_pseudofile) if not user_agent: # Catch case where the UA is broken on the UI, and the async submission. - ua: str = self.get_config('default_user_agent') # type: ignore + ua: str = get_config('generic', 'default_user_agent') # type: ignore else: ua = user_agent - if int(depth) > int(self.get_config('max_depth')): # type: ignore - self.logger.warning(f'Not allowed to scrape on a depth higher than {self.get_config("max_depth")}: {depth}') - depth = int(self.get_config('max_depth')) # type: ignore + if int(depth) > int(get_config('generic', 'max_depth')): # type: ignore + self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}') + depth = int(get_config('generic', 'max_depth')) # type: ignore items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua, - referer=referer, log_enabled=True, log_level=self.get_config('splash_loglevel')) + referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel')) if not items: # broken return False diff --git a/poetry.lock b/poetry.lock index 9e12faf..ad8531f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -690,7 +690,7 @@ description = "Pygments is a syntax highlighting package written in Python." name = "pygments" optional = false python-versions = ">=3.5" -version = "2.7.0" +version = "2.7.1" [[package]] category = "main" @@ -887,7 +887,7 @@ description = "Scrapy splash wrapper as a standalone library." name = "scrapysplashwrapper" optional = false python-versions = ">=3.7,<4.0" -version = "1.2.3" +version = "1.2.4" [package.dependencies] scrapy = ">=1.8.0,<2.0.0" @@ -1566,8 +1566,8 @@ pyeupi = [ {file = "pyeupi-1.1.tar.gz", hash = "sha256:2309c61ac2ef0eafabd6e9f32a0078069ffbba0e113ebc6b51cffc1869094472"}, ] pygments = [ - {file = "Pygments-2.7.0-py3-none-any.whl", hash = "sha256:2df50d16b45b977217e02cba6c8422aaddb859f3d0570a88e09b00eafae89c6e"}, - {file = "Pygments-2.7.0.tar.gz", hash = "sha256:2594e8fdb06fef91552f86f4fd3a244d148ab24b66042036e64f29a291515048"}, + {file = "Pygments-2.7.1-py3-none-any.whl", hash = "sha256:307543fe65c0947b126e83dd5a61bd8acbd84abec11f43caebaf5534cbc17998"}, + {file = "Pygments-2.7.1.tar.gz", hash = "sha256:926c3f319eda178d1bd90851e4317e6d8cdb5e292a3386aac9bd75eca29cf9c7"}, ] pyhamcrest = [ {file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"}, @@ -1625,8 +1625,8 @@ scrapy-splash = [ {file = "scrapy_splash-0.7.2-py2.py3-none-any.whl", hash = "sha256:71ac958370f8732fec746a25a8235b03a4d3c4c93a59be51aa8e910a08cfe511"}, ] scrapysplashwrapper = [ - {file = "scrapysplashwrapper-1.2.3-py3-none-any.whl", hash = "sha256:527fa816517ac260b029b7c045101493a5cce10b1664e2fc07d723ba26531d7e"}, - {file = "scrapysplashwrapper-1.2.3.tar.gz", hash = "sha256:2dc99037a6c72f9d796c6ba57e69abeaebbaca6a13cfca41d9ac79d66ef26c83"}, + {file = "scrapysplashwrapper-1.2.4-py3-none-any.whl", hash = "sha256:11930ef076d0fdba66caa6045f8c9c142247cb4eb0ba0939e08306a5b4e879bf"}, + {file = "scrapysplashwrapper-1.2.4.tar.gz", hash = "sha256:d1185c760dde1bcda389223545ea8fe16bd9308e52c9baa52f654fb91c7a787f"}, ] service-identity = [ {file = "service_identity-18.1.0-py2.py3-none-any.whl", hash = "sha256:001c0707759cb3de7e49c078a7c0c9cd12594161d3bf06b9c254fdcb1a60dc36"}, diff --git a/website/web/__init__.py b/website/web/__init__.py index 5ec390a..a6f6467 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -13,7 +13,7 @@ from flask import Flask, render_template, request, send_file, redirect, url_for, from flask_bootstrap import Bootstrap # type: ignore from flask_httpauth import HTTPDigestAuth # type: ignore -from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents +from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config from lookyloo.lookyloo import Lookyloo, Indexing from lookyloo.exceptions import NoValidHarFile, MissingUUID from .proxied import ReverseProxied @@ -42,11 +42,11 @@ auth = HTTPDigestAuth() lookyloo: Lookyloo = Lookyloo() -user = lookyloo.get_config('cache_clean_user') -time_delta_on_index = lookyloo.get_config('time_delta_on_index') -blur_screenshot = lookyloo.get_config('enable_default_blur_screenshot') +user = get_config('generic', 'cache_clean_user') +time_delta_on_index = get_config('generic', 'time_delta_on_index') +blur_screenshot = get_config('generic', 'enable_default_blur_screenshot') -logging.basicConfig(level=lookyloo.get_config('loglevel')) +logging.basicConfig(level=get_config('generic', 'loglevel')) # Method to make sizes in bytes human readable @@ -140,7 +140,7 @@ def scrape_web(): os=request.form.get('os'), browser=request.form.get('browser')) return redirect(url_for('tree', tree_uuid=perma_uuid)) user_agents: Dict[str, Any] = {} - if lookyloo.get_config('use_user_agents_users'): + if get_config('generic', 'use_user_agents_users'): lookyloo.build_ua_file() # NOTE: For now, just generate the file, so we have an idea of the size # user_agents = get_user_agents('own_user_agents') @@ -191,7 +191,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str): keys_request = { 'request_cookie': "/static/cookie_read.png", } - if lookyloo.get_config('enable_context_by_users'): + if get_config('generic', 'enable_context_by_users'): enable_context_by_users = True else: enable_context_by_users = False @@ -250,11 +250,18 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str): if isinstance(posted, bytes): to_return = BytesIO(posted) + is_blob = True else: to_return = BytesIO(posted.encode()) + is_blob = False to_return.seek(0) - return send_file(to_return, mimetype='text/plain', - as_attachment=True, attachment_filename='posted_data.txt') + + if is_blob: + return send_file(to_return, mimetype='application/octet-stream', + as_attachment=True, attachment_filename='posted_data.bin') + else: + return send_file(to_return, mimetype='text/plain', + as_attachment=True, attachment_filename='posted_data.txt') @app.route('/tree//url//ressource', methods=['POST', 'GET']) @@ -407,11 +414,11 @@ def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None): flash(cache['error'], 'error') try: - if lookyloo.get_config('enable_mail_notification'): + if get_config('generic', 'enable_mail_notification'): enable_mail_notification = True else: enable_mail_notification = False - if lookyloo.get_config('enable_context_by_users'): + if get_config('generic', 'enable_context_by_users'): enable_context_by_users = True else: enable_context_by_users = False diff --git a/website/web/templates/hostname_popup.html b/website/web/templates/hostname_popup.html index 1e05cbf..cd7a910 100644 --- a/website/web/templates/hostname_popup.html +++ b/website/web/templates/hostname_popup.html @@ -189,6 +189,9 @@ across all the captures on this lookyloo instance, in {{ details['hash_domains_freq'] }} unique domains. {{ get_ressource_button(tree_uuid, url['url_object'].uuid, hash, 'Download the embedded ressource') }}
+ {% if enable_context_by_users %} + {{ context_form(tree_uuid, url['url_object'].uuid, hostnode_uuid, hash, 'hostnode_popup') }} + {% endif %} {% if 'other_captures' in details %} {{ indexed_hash(details['other_captures'], hash) }}