diff --git a/lookyloo/__init__.py b/lookyloo/__init__.py index 4f20e504..d46c3514 100644 --- a/lookyloo/__init__.py +++ b/lookyloo/__init__.py @@ -2,7 +2,8 @@ import logging from .context import Context # noqa from .indexing import Indexing # noqa -from .lookyloo import Lookyloo, CaptureSettings # noqa +from .helpers import CaptureSettings # noqa +from .lookyloo import Lookyloo # noqa logging.getLogger(__name__).addHandler(logging.NullHandler()) diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index fce2800e..1d929f2a 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -291,7 +291,7 @@ class CapturesIndex(Mapping): # type: ignore[type-arg] if hasattr(cc, 'timestamp'): recent_captures[uuid] = cc.timestamp.timestamp() if recent_captures: - self.redis.zadd('recent_captures', recent_captures) + self.redis.zadd('recent_captures', recent_captures, nx=True) def _get_capture_dir(self, uuid: str) -> str: # Try to get from the recent captures cache in redis diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index e11eb18b..f58116ea 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -20,6 +20,7 @@ from urllib.parse import urlparse from har2tree import CrawledTree, HostNode, URLNode +from lacuscore import CaptureSettings as LacuscoreCaptureSettings from playwrightcapture import get_devices from publicsuffixlist import PublicSuffixList # type: ignore[import-untyped] from pytaxonomies import Taxonomies # type: ignore[attr-defined] @@ -392,3 +393,30 @@ class ParsedUserAgent(UserAgent): def __str__(self) -> str: return f'OS: {self.platform} - Browser: {self.browser} {self.version} - UA: {self.string}' + + +class CaptureSettings(LacuscoreCaptureSettings, total=False): + '''The capture settings that can be passed to Lookyloo''' + listing: int | None + not_queued: int | None + auto_report: bool | str | dict[str, str] | None # {'email': , 'comment': , 'recipient_mail':} + dnt: str | None + browser_name: str | None + os: str | None + parent: str | None + + +# overwrite set to True means the settings in the config file overwrite the settings +# provided by the user. False will simply append the settings from the config file if they +# don't exist. +class UserCaptureSettings(CaptureSettings, total=False): + overwrite: bool + + +@lru_cache(64) +def load_user_config(username: str) -> UserCaptureSettings | None: + user_config_path = get_homedir() / 'config' / 'users' / f'{username}.json' + if not user_config_path.exists(): + return None + with user_config_path.open() as _c: + return json.load(_c) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index b2d1b0c8..add33736 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -16,7 +16,6 @@ import time from collections import defaultdict from datetime import date, datetime, timedelta, timezone -from functools import lru_cache from email.message import EmailMessage from functools import cached_property from io import BytesIO @@ -34,7 +33,8 @@ from lacuscore import (LacusCore, CaptureStatus as CaptureStatusCore, # CaptureResponse as CaptureResponseCore) # CaptureResponseJson as CaptureResponseJsonCore, - CaptureSettings as CaptureSettingsCore) + # CaptureSettings as CaptureSettingsCore + ) from PIL import Image, UnidentifiedImageError from playwrightcapture import get_devices from puremagic import from_string, PureError # type: ignore[import-untyped] @@ -58,7 +58,8 @@ from .exceptions import (MissingCaptureDirectory, from .helpers import (get_captures_dir, get_email_template, get_resources_hashes, get_taxonomies, uniq_domains, ParsedUserAgent, load_cookies, UserAgents, - get_useragent_for_requests, load_takedown_filters + get_useragent_for_requests, load_takedown_filters, + CaptureSettings, UserCaptureSettings, load_user_config ) from .modules import (MISPs, PhishingInitiative, UniversalWhois, UrlScan, VirusTotal, Phishtank, Hashlookup, @@ -68,33 +69,6 @@ if TYPE_CHECKING: from playwright.async_api import Cookie -class CaptureSettings(CaptureSettingsCore, total=False): - '''The capture settings that can be passed to Lookyloo''' - listing: int | None - not_queued: int | None - auto_report: bool | str | dict[str, str] | None # {'email': , 'comment': , 'recipient_mail':} - dnt: str | None - browser_name: str | None - os: str | None - parent: str | None - - -# overwrite set to True means the settings in the config file overwrite the settings -# provided by the user. False will simply append the settings from the config file if they -# don't exist. -class UserCaptureSettings(CaptureSettings, total=False): - overwrite: bool - - -@lru_cache(64) -def load_user_config(username: str) -> UserCaptureSettings | None: - user_config_path = get_homedir() / 'config' / 'users' / f'{username}.json' - if not user_config_path.exists(): - return None - with user_config_path.open() as _c: - return json.load(_c) - - class Lookyloo(): def __init__(self, cache_max_size: int | None=None) -> None: diff --git a/website/web/__init__.py b/website/web/__init__.py index dbc846b2..4c5b1af0 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -41,7 +41,7 @@ from werkzeug.wrappers.response import Response as WerkzeugResponse from lookyloo import Lookyloo, CaptureSettings from lookyloo.default import get_config from lookyloo.exceptions import MissingUUID, NoValidHarFile, LacusUnreachable -from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies +from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies, UserCaptureSettings, load_user_config if sys.version_info < (3, 9): from pytz import all_timezones_set @@ -52,7 +52,7 @@ else: from .genericapi import api as generic_api from .helpers import (User, build_users_table, get_secret_key, load_user_from_request, src_request_ip, sri_load, - get_lookyloo_instance, get_indexing) + get_lookyloo_instance, get_indexing, build_keys_table) from .proxied import ReverseProxied logging.config.dictConfig(get_config('logging')) @@ -73,6 +73,7 @@ pkg_version = version('lookyloo') # Auth stuff login_manager = flask_login.LoginManager() login_manager.init_app(app) +build_keys_table() # User agents manager user_agents = UserAgents() @@ -1314,6 +1315,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: str cached.redirects)) titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True) return render_template('index.html', titles=titles, public_domain=lookyloo.public_domain, + show_hidden=show_hidden, show_project_page=get_config('generic', 'show_project_page'), version=pkg_version) @@ -1422,13 +1424,14 @@ def search() -> str | Response | WerkzeugResponse: return render_template('search.html') -def _prepare_capture_template(user_ua: str | None, predefined_url: str | None=None) -> str: +def _prepare_capture_template(user_ua: str | None, predefined_url: str | None=None, *, user_config: UserCaptureSettings | None=None) -> str: return render_template('capture.html', user_agents=user_agents.user_agents, default=user_agents.default, personal_ua=user_ua, default_public=get_config('generic', 'default_public'), devices=lookyloo.get_playwright_devices(), predefined_url_to_capture=predefined_url if predefined_url else '', + user_config=user_config, has_global_proxy=True if lookyloo.global_proxy else False) @@ -1496,8 +1499,10 @@ def submit_capture() -> str | Response | WerkzeugResponse: @app.route('/capture', methods=['GET', 'POST']) def capture_web() -> str | Response | WerkzeugResponse: + user_config: UserCaptureSettings | None = None if flask_login.current_user.is_authenticated: user = flask_login.current_user.get_id() + user_config = load_user_config(user) else: user = src_request_ip(request) @@ -1609,7 +1614,7 @@ def capture_web() -> str | Response | WerkzeugResponse: return redirect(url_for('tree', tree_uuid=perma_uuid)) # render template - return _prepare_capture_template(user_ua=request.headers.get('User-Agent')) + return _prepare_capture_template(user_ua=request.headers.get('User-Agent'), user_config=user_config) @app.route('/simple_capture', methods=['GET', 'POST']) diff --git a/website/web/genericapi.py b/website/web/genericapi.py index 09236205..9f453391 100644 --- a/website/web/genericapi.py +++ b/website/web/genericapi.py @@ -14,7 +14,7 @@ from zipfile import ZipFile import flask_login # type: ignore[import-untyped] from flask import request, send_file, Response -from flask_restx import Namespace, Resource, fields # type: ignore[import-untyped] +from flask_restx import Namespace, Resource, fields, abort # type: ignore[import-untyped] from werkzeug.security import check_password_hash from lacuscore import CaptureStatus as CaptureStatusCore @@ -22,8 +22,10 @@ from pylacus import CaptureStatus as CaptureStatusPy from lookyloo import CaptureSettings, Lookyloo from lookyloo.comparator import Comparator from lookyloo.exceptions import MissingUUID, NoValidHarFile +from lookyloo.helpers import load_user_config, UserCaptureSettings -from .helpers import build_users_table, load_user_from_request, src_request_ip, get_lookyloo_instance, get_indexing +from .helpers import (build_users_table, load_user_from_request, src_request_ip, + get_lookyloo_instance, get_indexing) api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/') @@ -34,7 +36,7 @@ comparator: Comparator = Comparator() def api_auth_check(method): # type: ignore[no-untyped-def] if flask_login.current_user.is_authenticated or load_user_from_request(request): return method - return 'Authentication required.', 403 + abort(403, 'Authentication required.') token_request_fields = api.model('AuthTokenFields', { @@ -49,6 +51,17 @@ def handle_no_HAR_file_exception(error: Any) -> tuple[dict[str, str], int]: return {'message': str(error)}, 400 +@api.route('/json/get_user_config') +@api.doc(description='Get the configuration of the user (if any)', security='apikey') +class UserConfig(Resource): # type: ignore[misc] + method_decorators = [api_auth_check] + + def get(self) -> UserCaptureSettings | None | tuple[dict[str, str], int]: + if not flask_login.current_user.is_authenticated: + return {'error': 'User not authenticated.'}, 401 + return load_user_config(flask_login.current_user.get_id()) + + @api.route('/json/get_token') @api.doc(description='Get the API token required for authenticated calls') class AuthToken(Resource): # type: ignore[misc] diff --git a/website/web/helpers.py b/website/web/helpers.py index 6d15aa3d..ba3841f2 100644 --- a/website/web/helpers.py +++ b/website/web/helpers.py @@ -14,7 +14,7 @@ from flask import Request from werkzeug.security import generate_password_hash from lookyloo import Lookyloo, Indexing -from lookyloo.default import get_config, get_homedir +from lookyloo.default import get_config, get_homedir, LookylooException __global_lookyloo_instance = None @@ -57,9 +57,12 @@ def is_valid_username(username: str) -> bool: @lru_cache(64) def build_keys_table() -> dict[str, str]: - keys_table = {} + keys_table: dict[str, str] = {} for username, authstuff in build_users_table().items(): if 'authkey' in authstuff: + if authstuff['authkey'] in keys_table: + existing_user = keys_table[authstuff['authkey']] + raise LookylooException(f'Duplicate authkey found for {existing_user} and {username}.') keys_table[authstuff['authkey']] = username return keys_table @@ -85,7 +88,7 @@ def build_users_table() -> dict[str, dict[str, str]]: users_table[username] = {} users_table[username]['password'] = generate_password_hash(authstuff) users_table[username]['authkey'] = hashlib.pbkdf2_hmac('sha256', get_secret_key(), - authstuff.encode(), + f'{username}{authstuff}'.encode(), 100000).hex() elif isinstance(authstuff, list) and len(authstuff) == 2: diff --git a/website/web/templates/capture.html b/website/web/templates/capture.html index ecae2c44..b8ca8db1 100644 --- a/website/web/templates/capture.html +++ b/website/web/templates/capture.html @@ -29,6 +29,37 @@ {{ render_messages(container=True, dismissible=True) }} + {% if current_user.is_authenticated %} +
You are logged-in as {{ current_user.id }} + + {% if user_config %} + {% if user_config['overwrite'] == true %} + The settings in your users configuration file will overwrite the settings you configure in the form below. + {% else %} + The settings in your users configuration file will only be used if you don't overwrite them in the form below. + {% endif %} +
+