mirror of https://github.com/CIRCL/lookyloo
new: Config option for Flask IP and Port, reorganize config loading
parent
af208eccca
commit
7a34095d9c
|
@ -4,7 +4,7 @@
|
|||
import time
|
||||
import signal
|
||||
from subprocess import Popen
|
||||
from lookyloo.helpers import get_homedir, shutdown_requested, set_running, unset_running, get_socket_path
|
||||
from lookyloo.helpers import get_homedir, shutdown_requested, set_running, unset_running, get_socket_path, get_config
|
||||
from redis import StrictRedis
|
||||
|
||||
|
||||
|
@ -13,10 +13,12 @@ if __name__ == '__main__':
|
|||
r.delete('cache_loaded')
|
||||
website_dir = get_homedir() / 'website'
|
||||
Popen([str(website_dir / '3rdparty.sh')], cwd=website_dir)
|
||||
ip = get_config('generic', 'website_listen_ip')
|
||||
port = get_config('generic', 'website_listen_port')
|
||||
try:
|
||||
p = Popen(['gunicorn','-w', '10',
|
||||
p = Popen(['gunicorn', '-w', '10',
|
||||
'--graceful-timeout', '2', '--timeout', '300',
|
||||
'-b', '0.0.0.0:5100',
|
||||
'-b', f'{ip}:{port}',
|
||||
'--log-level', 'info',
|
||||
'web:app'],
|
||||
cwd=website_dir)
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
"splash_loglevel": "WARNING",
|
||||
"only_global_lookups": true,
|
||||
"public_instance": false,
|
||||
"website_listen_ip": "0.0.0.0",
|
||||
"website_listen_port": 5100,
|
||||
"splash_url": "http://127.0.0.1:8050",
|
||||
"default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
|
||||
"cache_clean_user": {},
|
||||
|
@ -28,6 +30,8 @@
|
|||
"loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels",
|
||||
"splash_loglevel": "(Splash) INFO is *very* verbose.",
|
||||
"public_instance": "true means disabling features deemed unsafe on a public instance (such as indexing private captures)",
|
||||
"website_listen_ip": "IP Flask will listen on. Defaults to 0.0.0.0, meaning all interfaces.",
|
||||
"website_listen_port": "Port Flask will listen on.",
|
||||
"only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network",
|
||||
"splash_url": "URL to connect to splash",
|
||||
"default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA",
|
||||
|
|
|
@ -1,13 +1,17 @@
|
|||
{
|
||||
"VirusTotal": {
|
||||
"apikey": "KEY",
|
||||
"apikey": null,
|
||||
"autosubmit": false
|
||||
},
|
||||
"PhishingInitiative": {
|
||||
"apikey": "KEY",
|
||||
"apikey": null,
|
||||
"autosubmit": false
|
||||
},
|
||||
"SaneJS": {
|
||||
"enabled": true
|
||||
},
|
||||
"_notes": {
|
||||
"apikey": "null disables the module. Pass a string otherwise.",
|
||||
"autosubmit": "Automatically submits the URL to the 3rd party service."
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import logging
|
||||
from typing import List, Optional, Dict, Union, Any
|
||||
from io import BufferedIOBase
|
||||
from pathlib import Path
|
||||
|
@ -23,6 +24,9 @@ try:
|
|||
except ImportError:
|
||||
HAS_CF = False
|
||||
|
||||
configs: Dict[str, Dict[str, Any]] = {}
|
||||
logger = logging.getLogger('Lookyloo - Helpers')
|
||||
|
||||
|
||||
def get_homedir() -> Path:
|
||||
if not os.environ.get('LOOKYLOO_HOME'):
|
||||
|
@ -48,7 +52,10 @@ def get_email_template() -> str:
|
|||
return f.read()
|
||||
|
||||
|
||||
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None) -> Dict[str, Dict[str, Any]]:
|
||||
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
|
||||
global configs
|
||||
if configs is not None:
|
||||
return
|
||||
if path_to_config_files:
|
||||
if isinstance(path_to_config_files, str):
|
||||
config_path = Path(path_to_config_files)
|
||||
|
@ -61,11 +68,28 @@ def load_configs(path_to_config_files: Optional[Union[str, Path]]=None) -> Dict[
|
|||
elif not config_path.is_dir():
|
||||
raise ConfigError(f'Configuration directory {config_path} is not a directory.')
|
||||
|
||||
to_return = {}
|
||||
configs = {}
|
||||
for path in config_path.glob('*.json'):
|
||||
with path.open() as _c:
|
||||
to_return[path.stem] = json.load(_c)
|
||||
return to_return
|
||||
configs[path.stem] = json.load(_c)
|
||||
|
||||
|
||||
def get_config(config_type: str, entry: str) -> Any:
|
||||
"""Get an entry from the given config_type file. Automatic fallback to the sample file"""
|
||||
global configs
|
||||
if configs is None:
|
||||
load_configs()
|
||||
if config_type in configs:
|
||||
if entry in configs[config_type]:
|
||||
return configs[config_type][entry]
|
||||
else:
|
||||
logger.warning(f'Unable to find {entry} in config file.')
|
||||
else:
|
||||
logger.warning('No generic config file available.')
|
||||
logger.warning('Falling back on sample config, please initialize the generic config file.')
|
||||
with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c:
|
||||
sample_config = json.load(_c)
|
||||
return sample_config[entry]
|
||||
|
||||
|
||||
def safe_create_dir(to_create: Path) -> None:
|
||||
|
|
|
@ -29,7 +29,7 @@ from scrapysplashwrapper import crawl
|
|||
from werkzeug.useragents import UserAgent
|
||||
|
||||
from .exceptions import NoValidHarFile, MissingUUID
|
||||
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree, load_known_content
|
||||
from .helpers import get_homedir, get_socket_path, load_cookies, get_config, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree, load_known_content
|
||||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
|
||||
|
||||
|
||||
|
@ -483,10 +483,9 @@ class Lookyloo():
|
|||
|
||||
def __init__(self) -> None:
|
||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
self.configs: Dict[str, Dict[str, Any]] = load_configs()
|
||||
self.logger.setLevel(self.get_config('loglevel'))
|
||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||
self.indexing = Indexing()
|
||||
self.is_public_instance = self.get_config('public_instance')
|
||||
self.is_public_instance = get_config('generic', 'public_instance')
|
||||
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
||||
self.scrape_dir: Path = get_homedir() / 'scraped'
|
||||
|
@ -494,27 +493,23 @@ class Lookyloo():
|
|||
# In order to have a working default for the docker image, it is easier to use an environment variable
|
||||
self.splash_url: str = os.environ['SPLASH_URL_DOCKER']
|
||||
else:
|
||||
self.splash_url = self.get_config('splash_url')
|
||||
self.only_global_lookups: bool = self.get_config('only_global_lookups')
|
||||
self.splash_url = get_config('generic', 'splash_url')
|
||||
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
|
||||
|
||||
safe_create_dir(self.scrape_dir)
|
||||
|
||||
# Initialize 3rd party components
|
||||
if 'modules' not in self.configs:
|
||||
self.logger.info('No third party components available in the config directory')
|
||||
else:
|
||||
if 'PhishingInitiative' in self.configs['modules']:
|
||||
self.pi = PhishingInitiative(self.configs['modules']['PhishingInitiative'])
|
||||
if not self.pi.available:
|
||||
self.logger.warning('Unable to setup the PhishingInitiative module')
|
||||
if 'VirusTotal' in self.configs['modules']:
|
||||
self.vt = VirusTotal(self.configs['modules']['VirusTotal'])
|
||||
if not self.vt.available:
|
||||
self.logger.warning('Unable to setup the VirusTotal module')
|
||||
if 'SaneJS' in self.configs['modules']:
|
||||
self.sanejs = SaneJavaScript(self.configs['modules']['SaneJS'])
|
||||
if not self.sanejs.available:
|
||||
self.logger.warning('Unable to setup the SaneJS module')
|
||||
self.pi = PhishingInitiative(get_config('modules', 'PhishingInitiative'))
|
||||
if not self.pi.available:
|
||||
self.logger.warning('Unable to setup the PhishingInitiative module')
|
||||
|
||||
self.vt = VirusTotal(get_config('modules', 'VirusTotal'))
|
||||
if not self.vt.available:
|
||||
self.logger.warning('Unable to setup the VirusTotal module')
|
||||
|
||||
self.sanejs = SaneJavaScript(get_config('modules', 'SaneJS'))
|
||||
if not self.sanejs.available:
|
||||
self.logger.warning('Unable to setup the SaneJS module')
|
||||
|
||||
if hasattr(self, 'sanejs') and self.sanejs.available:
|
||||
self.context = Context(self.sanejs)
|
||||
|
@ -633,20 +628,6 @@ class Lookyloo():
|
|||
remove_pickle_tree(capture_dir)
|
||||
self.rebuild_cache()
|
||||
|
||||
def get_config(self, entry: str) -> Any:
|
||||
"""Get an entry from the generic config file. Automatic fallback to the sample file"""
|
||||
if 'generic' in self.configs:
|
||||
if entry in self.configs['generic']:
|
||||
return self.configs['generic'][entry]
|
||||
else:
|
||||
self.logger.warning(f'Unable to find {entry} in config file.')
|
||||
else:
|
||||
self.logger.warning('No generic config file available.')
|
||||
self.logger.warning('Falling back on sample config, please initialize the generic config file.')
|
||||
with (get_homedir() / 'config' / 'generic.json.sample').open() as _c:
|
||||
sample_config = json.load(_c)
|
||||
return sample_config[entry]
|
||||
|
||||
def get_urlnode_from_tree(self, capture_uuid: str, node_uuid: str) -> URLNode:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
|
@ -873,7 +854,7 @@ class Lookyloo():
|
|||
return False
|
||||
|
||||
def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None:
|
||||
if not self.get_config('enable_mail_notification'):
|
||||
if not get_config('generic', 'enable_mail_notification'):
|
||||
return
|
||||
|
||||
redirects = ''
|
||||
|
@ -887,7 +868,7 @@ class Lookyloo():
|
|||
else:
|
||||
redirects = "No redirects."
|
||||
|
||||
email_config = self.get_config('email')
|
||||
email_config = get_config('generic', 'email')
|
||||
msg = EmailMessage()
|
||||
msg['From'] = email_config['from']
|
||||
if email:
|
||||
|
@ -991,15 +972,15 @@ class Lookyloo():
|
|||
cookies = load_cookies(cookies_pseudofile)
|
||||
if not user_agent:
|
||||
# Catch case where the UA is broken on the UI, and the async submission.
|
||||
ua: str = self.get_config('default_user_agent') # type: ignore
|
||||
ua: str = get_config('generic', 'default_user_agent') # type: ignore
|
||||
else:
|
||||
ua = user_agent
|
||||
|
||||
if int(depth) > int(self.get_config('max_depth')): # type: ignore
|
||||
self.logger.warning(f'Not allowed to scrape on a depth higher than {self.get_config("max_depth")}: {depth}')
|
||||
depth = int(self.get_config('max_depth')) # type: ignore
|
||||
if int(depth) > int(get_config('generic', 'max_depth')): # type: ignore
|
||||
self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}')
|
||||
depth = int(get_config('generic', 'max_depth')) # type: ignore
|
||||
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
|
||||
referer=referer, log_enabled=True, log_level=self.get_config('splash_loglevel'))
|
||||
referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel'))
|
||||
if not items:
|
||||
# broken
|
||||
return False
|
||||
|
|
|
@ -690,7 +690,7 @@ description = "Pygments is a syntax highlighting package written in Python."
|
|||
name = "pygments"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
version = "2.7.0"
|
||||
version = "2.7.1"
|
||||
|
||||
[[package]]
|
||||
category = "main"
|
||||
|
@ -887,7 +887,7 @@ description = "Scrapy splash wrapper as a standalone library."
|
|||
name = "scrapysplashwrapper"
|
||||
optional = false
|
||||
python-versions = ">=3.7,<4.0"
|
||||
version = "1.2.3"
|
||||
version = "1.2.4"
|
||||
|
||||
[package.dependencies]
|
||||
scrapy = ">=1.8.0,<2.0.0"
|
||||
|
@ -1566,8 +1566,8 @@ pyeupi = [
|
|||
{file = "pyeupi-1.1.tar.gz", hash = "sha256:2309c61ac2ef0eafabd6e9f32a0078069ffbba0e113ebc6b51cffc1869094472"},
|
||||
]
|
||||
pygments = [
|
||||
{file = "Pygments-2.7.0-py3-none-any.whl", hash = "sha256:2df50d16b45b977217e02cba6c8422aaddb859f3d0570a88e09b00eafae89c6e"},
|
||||
{file = "Pygments-2.7.0.tar.gz", hash = "sha256:2594e8fdb06fef91552f86f4fd3a244d148ab24b66042036e64f29a291515048"},
|
||||
{file = "Pygments-2.7.1-py3-none-any.whl", hash = "sha256:307543fe65c0947b126e83dd5a61bd8acbd84abec11f43caebaf5534cbc17998"},
|
||||
{file = "Pygments-2.7.1.tar.gz", hash = "sha256:926c3f319eda178d1bd90851e4317e6d8cdb5e292a3386aac9bd75eca29cf9c7"},
|
||||
]
|
||||
pyhamcrest = [
|
||||
{file = "PyHamcrest-2.0.2-py3-none-any.whl", hash = "sha256:7ead136e03655af85069b6f47b23eb7c3e5c221aa9f022a4fbb499f5b7308f29"},
|
||||
|
@ -1625,8 +1625,8 @@ scrapy-splash = [
|
|||
{file = "scrapy_splash-0.7.2-py2.py3-none-any.whl", hash = "sha256:71ac958370f8732fec746a25a8235b03a4d3c4c93a59be51aa8e910a08cfe511"},
|
||||
]
|
||||
scrapysplashwrapper = [
|
||||
{file = "scrapysplashwrapper-1.2.3-py3-none-any.whl", hash = "sha256:527fa816517ac260b029b7c045101493a5cce10b1664e2fc07d723ba26531d7e"},
|
||||
{file = "scrapysplashwrapper-1.2.3.tar.gz", hash = "sha256:2dc99037a6c72f9d796c6ba57e69abeaebbaca6a13cfca41d9ac79d66ef26c83"},
|
||||
{file = "scrapysplashwrapper-1.2.4-py3-none-any.whl", hash = "sha256:11930ef076d0fdba66caa6045f8c9c142247cb4eb0ba0939e08306a5b4e879bf"},
|
||||
{file = "scrapysplashwrapper-1.2.4.tar.gz", hash = "sha256:d1185c760dde1bcda389223545ea8fe16bd9308e52c9baa52f654fb91c7a787f"},
|
||||
]
|
||||
service-identity = [
|
||||
{file = "service_identity-18.1.0-py2.py3-none-any.whl", hash = "sha256:001c0707759cb3de7e49c078a7c0c9cd12594161d3bf06b9c254fdcb1a60dc36"},
|
||||
|
|
|
@ -13,7 +13,7 @@ from flask import Flask, render_template, request, send_file, redirect, url_for,
|
|||
from flask_bootstrap import Bootstrap # type: ignore
|
||||
from flask_httpauth import HTTPDigestAuth # type: ignore
|
||||
|
||||
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents
|
||||
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config
|
||||
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
||||
from .proxied import ReverseProxied
|
||||
|
@ -42,11 +42,11 @@ auth = HTTPDigestAuth()
|
|||
|
||||
lookyloo: Lookyloo = Lookyloo()
|
||||
|
||||
user = lookyloo.get_config('cache_clean_user')
|
||||
time_delta_on_index = lookyloo.get_config('time_delta_on_index')
|
||||
blur_screenshot = lookyloo.get_config('enable_default_blur_screenshot')
|
||||
user = get_config('generic', 'cache_clean_user')
|
||||
time_delta_on_index = get_config('generic', 'time_delta_on_index')
|
||||
blur_screenshot = get_config('generic', 'enable_default_blur_screenshot')
|
||||
|
||||
logging.basicConfig(level=lookyloo.get_config('loglevel'))
|
||||
logging.basicConfig(level=get_config('generic', 'loglevel'))
|
||||
|
||||
|
||||
# Method to make sizes in bytes human readable
|
||||
|
@ -140,7 +140,7 @@ def scrape_web():
|
|||
os=request.form.get('os'), browser=request.form.get('browser'))
|
||||
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
||||
user_agents: Dict[str, Any] = {}
|
||||
if lookyloo.get_config('use_user_agents_users'):
|
||||
if get_config('generic', 'use_user_agents_users'):
|
||||
lookyloo.build_ua_file()
|
||||
# NOTE: For now, just generate the file, so we have an idea of the size
|
||||
# user_agents = get_user_agents('own_user_agents')
|
||||
|
@ -191,7 +191,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
|
|||
keys_request = {
|
||||
'request_cookie': "/static/cookie_read.png",
|
||||
}
|
||||
if lookyloo.get_config('enable_context_by_users'):
|
||||
if get_config('generic', 'enable_context_by_users'):
|
||||
enable_context_by_users = True
|
||||
else:
|
||||
enable_context_by_users = False
|
||||
|
@ -250,11 +250,18 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
|||
|
||||
if isinstance(posted, bytes):
|
||||
to_return = BytesIO(posted)
|
||||
is_blob = True
|
||||
else:
|
||||
to_return = BytesIO(posted.encode())
|
||||
is_blob = False
|
||||
to_return.seek(0)
|
||||
return send_file(to_return, mimetype='text/plain',
|
||||
as_attachment=True, attachment_filename='posted_data.txt')
|
||||
|
||||
if is_blob:
|
||||
return send_file(to_return, mimetype='application/octet-stream',
|
||||
as_attachment=True, attachment_filename='posted_data.bin')
|
||||
else:
|
||||
return send_file(to_return, mimetype='text/plain',
|
||||
as_attachment=True, attachment_filename='posted_data.txt')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET'])
|
||||
|
@ -407,11 +414,11 @@ def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None):
|
|||
flash(cache['error'], 'error')
|
||||
|
||||
try:
|
||||
if lookyloo.get_config('enable_mail_notification'):
|
||||
if get_config('generic', 'enable_mail_notification'):
|
||||
enable_mail_notification = True
|
||||
else:
|
||||
enable_mail_notification = False
|
||||
if lookyloo.get_config('enable_context_by_users'):
|
||||
if get_config('generic', 'enable_context_by_users'):
|
||||
enable_context_by_users = True
|
||||
else:
|
||||
enable_context_by_users = False
|
||||
|
|
|
@ -189,6 +189,9 @@
|
|||
across all the captures on this lookyloo instance, in <b>{{ details['hash_domains_freq'] }}</b> unique domains.
|
||||
{{ get_ressource_button(tree_uuid, url['url_object'].uuid, hash, 'Download the embedded ressource') }}
|
||||
</br>
|
||||
{% if enable_context_by_users %}
|
||||
{{ context_form(tree_uuid, url['url_object'].uuid, hostnode_uuid, hash, 'hostnode_popup') }}
|
||||
{% endif %}
|
||||
|
||||
{% if 'other_captures' in details %}
|
||||
{{ indexed_hash(details['other_captures'], hash) }}
|
||||
|
|
Loading…
Reference in New Issue