From 7324c8b08506e96bc3e4d430150ab198a93b57f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 25 Jun 2024 12:41:18 +0200 Subject: [PATCH] chg: Improve recaptures (WiP) --- lookyloo/capturecache.py | 11 +++-------- lookyloo/helpers.py | 9 +++++++++ website/web/__init__.py | 13 ++++++++----- website/web/templates/capture.html | 8 +++++--- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index 38e6c65b..b43a8609 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -28,7 +28,7 @@ from pyipasnhistory import IPASNHistory # type: ignore[attr-defined] from redis import Redis from .context import Context -from .helpers import get_captures_dir, is_locked +from .helpers import get_captures_dir, is_locked, load_capture_settings from .indexing import Indexing from .default import LookylooException, try_make_file, get_config from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, TreeNeedsRebuild @@ -413,12 +413,7 @@ class CapturesIndex(Mapping): # type: ignore[type-arg] raise MissingCaptureDirectory(f'Unable to find the UUID file in {capture_dir}.') # Get capture settings as they were submitted - capture_settings_file = capture_dir / 'capture_settings.json' - if capture_settings_file.exists(): - with capture_settings_file.open() as f: - capture_settings = json.load(f) - else: - capture_settings = {} + capture_settings = load_capture_settings(capture_dir) logger = LookylooCacheLogAdapter(self.logger, {'uuid': uuid}) try: @@ -435,7 +430,7 @@ class CapturesIndex(Mapping): # type: ignore[type-arg] tree = None cache: dict[str, str | int] = {'uuid': uuid, 'capture_dir': capture_dir_str} - if capture_settings.get('url'): + if capture_settings.get('url') and capture_settings['url'] is not None: cache['url'] = capture_settings['url'] if (capture_dir / 'error.txt').exists(): diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index f58116ea..90f7932e 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -82,6 +82,15 @@ def get_email_template() -> str: return f.read() +@lru_cache(256) +def load_capture_settings(capture_dir: Path) -> CaptureSettings: + capture_settings_file = capture_dir / 'capture_settings.json' + if capture_settings_file.exists(): + with capture_settings_file.open() as f: + return json.load(f) + return {} + + @lru_cache def load_takedown_filters() -> tuple[re.Pattern[str], re.Pattern[str], dict[str, list[str]]]: filter_ini_file = get_homedir() / 'config' / 'takedown_filters.ini' diff --git a/website/web/__init__.py b/website/web/__init__.py index ffbb3dd1..fb42989c 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -41,7 +41,9 @@ from werkzeug.wrappers.response import Response as WerkzeugResponse from lookyloo import Lookyloo, CaptureSettings from lookyloo.default import get_config from lookyloo.exceptions import MissingUUID, NoValidHarFile, LacusUnreachable -from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies, UserCaptureSettings, load_user_config +from lookyloo.helpers import (get_taxonomies, UserAgents, load_cookies, + UserCaptureSettings, load_user_config, + load_capture_settings) if sys.version_info < (3, 9): from pytz import all_timezones_set @@ -1424,14 +1426,14 @@ def search() -> str | Response | WerkzeugResponse: return render_template('search.html') -def _prepare_capture_template(user_ua: str | None, predefined_url: str | None=None, *, +def _prepare_capture_template(user_ua: str | None, predefined_settings: CaptureSettings | None=None, *, user_config: UserCaptureSettings | None=None) -> str: return render_template('capture.html', user_agents=user_agents.user_agents, default=user_agents.default, personal_ua=user_ua, default_public=get_config('generic', 'default_public'), devices=lookyloo.get_playwright_devices(), - predefined_url_to_capture=predefined_url if predefined_url else '', + predefined_settings=predefined_settings if predefined_settings else {}, user_config=user_config, show_project_page=get_config('generic', 'show_project_page'), version=pkg_version, @@ -1441,9 +1443,10 @@ def _prepare_capture_template(user_ua: str | None, predefined_url: str | None=No @app.route('/recapture/', methods=['GET']) def recapture(tree_uuid: str) -> str | Response | WerkzeugResponse: cache = lookyloo.capture_cache(tree_uuid) - if cache and hasattr(cache, 'url'): + if cache and hasattr(cache, 'capture_dir'): + capture_settings = load_capture_settings(cache.capture_dir) return _prepare_capture_template(user_ua=request.headers.get('User-Agent'), - predefined_url=cache.url) + predefined_settings=capture_settings) flash(f'Unable to find the capture {tree_uuid} in the cache.', 'error') return _prepare_capture_template(user_ua=request.headers.get('User-Agent')) diff --git a/website/web/templates/capture.html b/website/web/templates/capture.html index 885f2bc2..6bee1507 100644 --- a/website/web/templates/capture.html +++ b/website/web/templates/capture.html @@ -82,7 +82,8 @@
- +
@@ -102,7 +103,7 @@
+ placeholder="URL to capture" value="{{predefined_settings.get('url', '')}}" required> @@ -280,7 +281,8 @@
- +
We'll attempt to click on the button allowing the website captured to violate your privacy.