From ce9ecfefdb6871f85c7a0a00758aff4b35a2c71b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Sat, 11 Jun 2022 14:49:58 +0200 Subject: [PATCH] chg: remove reference to max depth (not used since playwright), cleanup --- config/generic.json.sample | 2 -- lookyloo/helpers.py | 11 ++++++----- website/web/__init__.py | 6 +++--- website/web/templates/capture.html | 13 ------------- 4 files changed, 9 insertions(+), 23 deletions(-) diff --git a/config/generic.json.sample b/config/generic.json.sample index 18fd0908..8d1e3c9e 100644 --- a/config/generic.json.sample +++ b/config/generic.json.sample @@ -13,7 +13,6 @@ "days": 0, "hours": 0 }, - "max_depth": 1, "async_capture_processes": 1, "use_user_agents_users": false, "enable_default_blur_screenshot": false, @@ -57,7 +56,6 @@ "default_public": "If true, the capture is public and will be visible on the index page by default (can be unticked on the capture page).", "users": "It is some kind of an admin accounts. Format: {username: password}", "time_delta_on_index": "Time interval of the capture displayed on the index", - "max_depth": "Maximum depth for scraping. Anything > 1 will be exponentially bigger.", "async_capture_processes": "Number of async_capture processes to start. This should not be higher than the number of splash instances you have running. A very high number will use *a lot* of ram.", "use_user_agents_users": "Only usable for medium/high use instances: use the user agents of the users of the platform", "enable_default_blur_screenshot": "If true, blur the screenshot by default (useful on public instances)", diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index fdcf1540..7fb8303c 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -94,7 +94,10 @@ class UserAgents: self.path = get_homedir() / 'user_agents' ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True) - self.most_recent_ua_path = ua_files_path[0] + self._load_newest_ua_file(ua_files_path[0]) + + def _load_newest_ua_file(self, path: Path): + self.most_recent_ua_path = path with self.most_recent_ua_path.open() as f: self.most_recent_uas = json.load(f) self.by_freq = self.most_recent_uas.pop('by_frequency') @@ -103,10 +106,7 @@ class UserAgents: def user_agents(self) -> Dict[str, Dict[str, List[str]]]: ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True) if ua_files_path[0] != self.most_recent_ua_path: - self.most_recent_ua_path = ua_files_path[0] - with self.most_recent_ua_path.open() as f: - self.most_recent_uas = json.load(f) - self.by_freq = self.most_recent_uas.pop('by_frequency') + self._load_newest_ua_file(ua_files_path[0]) return self.most_recent_uas @property @@ -120,6 +120,7 @@ class UserAgents: return ua raise Exception('Erros with the User agents.') + def load_known_content(directory: str='known_content') -> Dict[str, Dict[str, Any]]: to_return: Dict[str, Dict[str, Any]] = {} for known_content_file in (get_homedir() / directory).glob('*.json'): diff --git a/website/web/__init__.py b/website/web/__init__.py index d55e2259..0a69802c 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -105,7 +105,6 @@ lookyloo: Lookyloo = Lookyloo() time_delta_on_index = get_config('generic', 'time_delta_on_index') blur_screenshot = get_config('generic', 'enable_default_blur_screenshot') -max_depth = get_config('generic', 'max_depth') use_own_ua = get_config('generic', 'use_user_agents_users') enable_mail_notification = get_config('generic', 'enable_mail_notification') @@ -801,7 +800,7 @@ def search(): def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[str]=None): return render_template('capture.html', user_agents=user_agents.user_agents, default=user_agents.default, - max_depth=max_depth, personal_ua=user_ua, + personal_ua=user_ua, default_public=get_config('generic', 'default_public'), predefined_url_to_capture=predefined_url if predefined_url else '') @@ -810,7 +809,8 @@ def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[s def recapture(tree_uuid: str): cache = lookyloo.capture_cache(tree_uuid) if cache: - return _prepare_capture_template(user_ua=request.headers.get('User-Agent'), predefined_url=cache.url) + return _prepare_capture_template(user_ua=request.headers.get('User-Agent'), + predefined_url=cache.url) flash(f'Unable to find the capture {tree_uuid} in the cache.', 'error') return _prepare_capture_template(user_ua=request.headers.get('User-Agent')) diff --git a/website/web/templates/capture.html b/website/web/templates/capture.html index 539064c2..7846faed 100644 --- a/website/web/templates/capture.html +++ b/website/web/templates/capture.html @@ -150,19 +150,6 @@
- {% if max_depth >= 2 %} -
- -
- -
-
- {%endif%} -