diff --git a/config/generic.json.sample b/config/generic.json.sample index 1a0c59a3..2cd5cec0 100644 --- a/config/generic.json.sample +++ b/config/generic.json.sample @@ -51,6 +51,7 @@ "hide_captures_with_error": false, "archive": 180, "max_capture_time": 3600, + "max_tree_create_time": 120, "_notes": { "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels", "only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network", @@ -77,6 +78,7 @@ "priority": "Define the priority of a new capture. A capture from the web interface has priority over a capture from the API, same for authenticated user vs. anonymous.", "hide_captures_with_error": "Capturing an URL may result in an error (domain non-existent, HTTP error, ...). They may be useful to see, but if you have a public instance, they will clutter the index.", "archive": "The captures older than this value (in days) will be archived. They're not cached by default in the Lookyloo class.", - "max_capture_time": "The very maximal time we allow a capture to keep going. Should only be triggered by captures that cause playwright to never quit." + "max_capture_time": "The very maximal time we allow a capture to keep going. Should only be triggered by captures that cause playwright to never quit.", + "max_tree_create_time": "The max time the generation of a tree is allowed to take" } } diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index cd9e87a1..d21f98f4 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -1,9 +1,11 @@ #!/usr/bin/env python3 +import contextlib import json import logging import os import pickle +import signal import sys import time @@ -105,6 +107,7 @@ class CapturesIndex(Mapping): self.contextualizer = contextualizer self.__cache: Dict[str, CaptureCache] = {} self._quick_init() + self.timeout = get_config('generic', 'max_tree_create_time') @property def cached_captures(self) -> Set[str]: @@ -211,7 +214,8 @@ class CapturesIndex(Mapping): if not (har_files := sorted(capture_dir.glob('*.har'))): har_files = sorted(capture_dir.glob('*.har.gz')) try: - tree = CrawledTree(har_files, uuid) + with self._timeout_context(): + tree = CrawledTree(har_files, uuid) self.__resolve_dns(tree) if self.contextualizer: self.contextualizer.contextualize_tree(tree) @@ -220,6 +224,11 @@ class CapturesIndex(Mapping): for har_file in har_files: har_file.rename(har_file.with_suffix('.broken')) raise NoValidHarFile(f'We got har files, but they are broken: {e}') + except TimeoutError: + self.logger.warning(f'Unable to rebuild the tree for {capture_dir}, the tree took too long.') + for har_file in har_files: + har_file.rename(har_file.with_suffix('.broken')) + raise NoValidHarFile(f'We got har files, but creating a tree took more than {self.timeout}s.') except RecursionError as e: raise NoValidHarFile(f'Tree too deep, probably a recursive refresh: {e}.\n Append /export to the URL to get the files.') else: @@ -238,6 +247,25 @@ class CapturesIndex(Mapping): lock_file.unlink(missing_ok=True) return tree + @staticmethod + def _raise_timeout(_, __): + raise TimeoutError + + @contextlib.contextmanager + def _timeout_context(self): + if self.timeout != 0: + # Register a function to raise a TimeoutError on the signal. + signal.signal(signal.SIGALRM, self._raise_timeout) + signal.alarm(self.timeout) + try: + yield + except TimeoutError as e: + raise e + finally: + signal.signal(signal.SIGALRM, signal.SIG_IGN) + else: + yield + def _set_capture_cache(self, capture_dir_str: str) -> CaptureCache: '''Populate the redis cache for a capture. Mostly used on the index page. NOTE: Doesn't require the pickle.''' @@ -254,7 +282,7 @@ class CapturesIndex(Mapping): tree = self._create_pickle(capture_dir) self.indexing.new_internal_uuids(tree) except NoValidHarFile: - self.logger.info('Unable to rebuild the tree, the HAR files are broken.') + self.logger.warning(f'Unable to rebuild the tree for {capture_dir}, the HAR files are broken.') cache: Dict[str, Union[str, int]] = {'uuid': uuid, 'capture_dir': capture_dir_str} if (capture_dir / 'error.txt').exists(): diff --git a/poetry.lock b/poetry.lock index aebe1b32..959894d9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -450,7 +450,7 @@ python-versions = ">=3.5" [[package]] name = "importlib-metadata" -version = "5.0.0" +version = "5.1.0" description = "Read metadata from Python packages" category = "main" optional = false @@ -1141,7 +1141,7 @@ jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] [[package]] name = "setuptools" -version = "65.6.0" +version = "65.6.3" description = "Easily download, build, install, upgrade, and uninstall Python packages" category = "main" optional = false @@ -1328,11 +1328,11 @@ python-versions = "*" [[package]] name = "urllib3" -version = "1.26.12" +version = "1.26.13" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" [package.extras] brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] @@ -1419,7 +1419,7 @@ multidict = ">=4.0" [[package]] name = "zipp" -version = "3.10.0" +version = "3.11.0" description = "Backport of pathlib-compatible object wrapper for zip files" category = "main" optional = false @@ -1883,8 +1883,8 @@ idna = [ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] importlib-metadata = [ - {file = "importlib_metadata-5.0.0-py3-none-any.whl", hash = "sha256:ddb0e35065e8938f867ed4928d0ae5bf2a53b7773871bfe6bcc7e4fcdc7dea43"}, - {file = "importlib_metadata-5.0.0.tar.gz", hash = "sha256:da31db32b304314d044d3c12c79bd59e307889b287ad12ff387b3500835fc2ab"}, + {file = "importlib_metadata-5.1.0-py3-none-any.whl", hash = "sha256:d84d17e21670ec07990e1044a99efe8d615d860fd176fc29ef5c306068fda313"}, + {file = "importlib_metadata-5.1.0.tar.gz", hash = "sha256:d5059f9f1e8e41f80e9c56c2ee58811450c31984dfa625329ffd7c0dad88a73b"}, ] importlib-resources = [ {file = "importlib_resources-5.10.0-py3-none-any.whl", hash = "sha256:ee17ec648f85480d523596ce49eae8ead87d5631ae1551f913c0100b5edd3437"}, @@ -2547,8 +2547,8 @@ rich = [ {file = "rich-12.6.0.tar.gz", hash = "sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0"}, ] setuptools = [ - {file = "setuptools-65.6.0-py3-none-any.whl", hash = "sha256:6211d2f5eddad8757bd0484923ca7c0a6302ebc4ab32ea5e94357176e0ca0840"}, - {file = "setuptools-65.6.0.tar.gz", hash = "sha256:d1eebf881c6114e51df1664bc2c9133d022f78d12d5f4f665b9191f084e2862d"}, + {file = "setuptools-65.6.3-py3-none-any.whl", hash = "sha256:57f6f22bde4e042978bcd50176fdb381d7c21a9efa4041202288d3737a0c6a54"}, + {file = "setuptools-65.6.3.tar.gz", hash = "sha256:a7620757bf984b58deaf32fc8a4577a9bbc0850cf92c20e1ce41c38c19e5fb75"}, ] six = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, @@ -2623,8 +2623,8 @@ ua-parser = [ {file = "ua_parser-0.16.1-py2.py3-none-any.whl", hash = "sha256:f97126300df8ac0f8f2c9d8559669532d626a1af529265fd253cba56e73ab36e"}, ] urllib3 = [ - {file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"}, - {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"}, + {file = "urllib3-1.26.13-py2.py3-none-any.whl", hash = "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc"}, + {file = "urllib3-1.26.13.tar.gz", hash = "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8"}, ] vt-py = [ {file = "vt-py-0.17.3.tar.gz", hash = "sha256:2f96fe86c7213dda9e45ab06bf18f7843f9513c1a073b1606fe238ea624a5b32"}, @@ -2774,6 +2774,6 @@ yarl = [ {file = "yarl-1.8.1.tar.gz", hash = "sha256:af887845b8c2e060eb5605ff72b6f2dd2aab7a761379373fd89d314f4752abbf"}, ] zipp = [ - {file = "zipp-3.10.0-py3-none-any.whl", hash = "sha256:4fcb6f278987a6605757302a6e40e896257570d11c51628968ccb2a47e80c6c1"}, - {file = "zipp-3.10.0.tar.gz", hash = "sha256:7a7262fd930bd3e36c50b9a64897aec3fafff3dfdeec9623ae22b40e93f99bb8"}, + {file = "zipp-3.11.0-py3-none-any.whl", hash = "sha256:83a28fcb75844b5c0cdaf5aa4003c2d728c77e05f5aeabe8e95e56727005fbaa"}, + {file = "zipp-3.11.0.tar.gz", hash = "sha256:a7a22e05929290a67401440b39690ae6563279bced5f314609d9d03798f56766"}, ]