fix: Avoid stashing the web interface when lacus becomes unavailable

pull/920/head
Raphaël Vinot 2024-05-17 17:29:11 +02:00
parent e2a8121898
commit 0a973be5dd
5 changed files with 77 additions and 55 deletions

View File

@ -14,6 +14,7 @@ from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResp
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
from lookyloo import Lookyloo, CaptureSettings from lookyloo import Lookyloo, CaptureSettings
from lookyloo.exceptions import LacusUnreachable
from lookyloo.default import AbstractManager, get_config from lookyloo.default import AbstractManager, get_config
from lookyloo.helpers import get_captures_dir from lookyloo.helpers import get_captures_dir
@ -31,7 +32,6 @@ class AsyncCapture(AbstractManager):
self.capture_dir: Path = get_captures_dir() self.capture_dir: Path = get_captures_dir()
self.lookyloo = Lookyloo() self.lookyloo = Lookyloo()
if isinstance(self.lookyloo.lacus, LacusCore):
self.captures: set[asyncio.Task] = set() # type: ignore[type-arg] self.captures: set[asyncio.Task] = set() # type: ignore[type-arg]
self.fox = FOX(config_name='FOX') self.fox = FOX(config_name='FOX')
@ -135,6 +135,7 @@ class AsyncCapture(AbstractManager):
if self.force_stop: if self.force_stop:
return None return None
try:
if isinstance(self.lookyloo.lacus, LacusCore): if isinstance(self.lookyloo.lacus, LacusCore):
await self._trigger_captures() await self._trigger_captures()
# NOTE: +1 because running this method also counts for one and will # NOTE: +1 because running this method also counts for one and will
@ -142,8 +143,11 @@ class AsyncCapture(AbstractManager):
self.set_running(len(self.captures) + 1) self.set_running(len(self.captures) + 1)
self.process_capture_queue() self.process_capture_queue()
except LacusUnreachable:
self.logger.error('Lacus is unreachable, retrying later.')
async def _wait_to_finish_async(self) -> None: async def _wait_to_finish_async(self) -> None:
try:
if isinstance(self.lookyloo.lacus, LacusCore): if isinstance(self.lookyloo.lacus, LacusCore):
while self.captures: while self.captures:
self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...') self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...')
@ -153,6 +157,8 @@ class AsyncCapture(AbstractManager):
self.process_capture_queue() self.process_capture_queue()
self.unset_running() self.unset_running()
self.logger.info('No more captures') self.logger.info('No more captures')
except LacusUnreachable:
self.logger.error('Lacus is unreachable, nothing to wait for')
def main() -> None: def main() -> None:

View File

@ -12,6 +12,7 @@ from typing import Any
from lacuscore import CaptureStatus as CaptureStatusCore from lacuscore import CaptureStatus as CaptureStatusCore
from lookyloo import Lookyloo from lookyloo import Lookyloo
from lookyloo.exceptions import LacusUnreachable
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
from lookyloo.helpers import ParsedUserAgent, serialize_to_json from lookyloo.helpers import ParsedUserAgent, serialize_to_json
from pylacus import CaptureStatus as CaptureStatusPy from pylacus import CaptureStatus as CaptureStatusPy
@ -78,6 +79,7 @@ class Processing(AbstractManager):
def _retry_failed_enqueue(self) -> None: def _retry_failed_enqueue(self) -> None:
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID''' '''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
to_requeue: list[str] = [] to_requeue: list[str] = []
try:
for uuid, _ in self.lookyloo.redis.zscan_iter('to_capture'): for uuid, _ in self.lookyloo.redis.zscan_iter('to_capture'):
if self.lookyloo.redis.hexists(uuid, 'not_queued'): if self.lookyloo.redis.hexists(uuid, 'not_queued'):
# The capture is marked as not queued # The capture is marked as not queued
@ -97,6 +99,9 @@ class Processing(AbstractManager):
# UUID is still unknown # UUID is still unknown
self.logger.info(f'UUID {uuid} is still unknown') self.logger.info(f'UUID {uuid} is still unknown')
to_requeue.append(uuid) to_requeue.append(uuid)
except LacusUnreachable:
self.logger.warning('Lacus still unreachable, trying again later')
return None
for uuid in to_requeue: for uuid in to_requeue:
if self.lookyloo.redis.zscore('to_capture', uuid) is None: if self.lookyloo.redis.zscore('to_capture', uuid) is None:
@ -130,6 +135,9 @@ class Processing(AbstractManager):
if new_uuid != uuid: if new_uuid != uuid:
# somehow, between the check and queuing, the UUID isn't UNKNOWN anymore, just checking that # somehow, between the check and queuing, the UUID isn't UNKNOWN anymore, just checking that
self.logger.warning(f'Had to change the capture UUID (duplicate). Old: {uuid} / New: {new_uuid}') self.logger.warning(f'Had to change the capture UUID (duplicate). Old: {uuid} / New: {new_uuid}')
except LacusUnreachable:
self.logger.warning('Lacus still unreachable.')
break
except Exception as e: except Exception as e:
self.logger.warning(f'Still unable to enqueue capture: {e}') self.logger.warning(f'Still unable to enqueue capture: {e}')
break break

View File

@ -21,3 +21,7 @@ class TreeNeedsRebuild(LookylooException):
class ModuleError(LookylooException): class ModuleError(LookylooException):
pass pass
class LacusUnreachable(LookylooException):
pass

View File

@ -54,7 +54,7 @@ from .capturecache import CaptureCache, CapturesIndex
from .context import Context from .context import Context
from .default import LookylooException, get_homedir, get_config, get_socket_path, safe_create_dir from .default import LookylooException, get_homedir, get_config, get_socket_path, safe_create_dir
from .exceptions import (MissingCaptureDirectory, from .exceptions import (MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild, NoValidHarFile) MissingUUID, TreeNeedsRebuild, NoValidHarFile, LacusUnreachable)
from .helpers import (get_captures_dir, get_email_template, from .helpers import (get_captures_dir, get_email_template,
get_resources_hashes, get_taxonomies, get_resources_hashes, get_taxonomies,
uniq_domains, ParsedUserAgent, load_cookies, UserAgents, uniq_domains, ParsedUserAgent, load_cookies, UserAgents,
@ -177,9 +177,6 @@ class Lookyloo():
self._captures_index = CapturesIndex(self.redis, self.context, maxsize=cache_max_size) self._captures_index = CapturesIndex(self.redis, self.context, maxsize=cache_max_size)
self.logger.info('Index initialized.') self.logger.info('Index initialized.')
# init lacus
self.lacus
@property @property
def redis(self) -> Redis: # type: ignore[type-arg] def redis(self) -> Redis: # type: ignore[type-arg]
return Redis(connection_pool=self.redis_pool) return Redis(connection_pool=self.redis_pool)
@ -192,7 +189,7 @@ class Lookyloo():
remote_lacus_config = get_config('generic', 'remote_lacus') remote_lacus_config = get_config('generic', 'remote_lacus')
if remote_lacus_config.get('enable'): if remote_lacus_config.get('enable'):
self.logger.info("Remote lacus enabled, trying to set it up...") self.logger.info("Remote lacus enabled, trying to set it up...")
lacus_retries = 10 lacus_retries = 2
while lacus_retries > 0: while lacus_retries > 0:
remote_lacus_url = remote_lacus_config.get('url') remote_lacus_url = remote_lacus_config.get('url')
self._lacus = PyLacus(remote_lacus_url) self._lacus = PyLacus(remote_lacus_url)
@ -202,9 +199,9 @@ class Lookyloo():
break break
lacus_retries -= 1 lacus_retries -= 1
self.logger.warning(f"Unable to setup remote lacus to {remote_lacus_url}, trying again {lacus_retries} more time(s).") self.logger.warning(f"Unable to setup remote lacus to {remote_lacus_url}, trying again {lacus_retries} more time(s).")
time.sleep(10) time.sleep(3)
else: else:
raise LookylooException('Remote lacus is enabled but unreachable.') raise LacusUnreachable('Remote lacus is enabled but unreachable.')
if not has_remote_lacus: if not has_remote_lacus:
# We need a redis connector that doesn't decode. # We need a redis connector that doesn't decode.
@ -544,6 +541,9 @@ class Lookyloo():
return CaptureStatusCore.ONGOING return CaptureStatusCore.ONGOING
try: try:
lacus_status = self.lacus.get_capture_status(capture_uuid) lacus_status = self.lacus.get_capture_status(capture_uuid)
except LacusUnreachable as e:
self.logger.warning(f'Unable to connect to lacus: {e}')
raise e
except Exception as e: except Exception as e:
self.logger.warning(f'Unable to get the status for {capture_uuid} from lacus: {e}') self.logger.warning(f'Unable to get the status for {capture_uuid} from lacus: {e}')
if self.redis.zscore('to_capture', capture_uuid) is not None: if self.redis.zscore('to_capture', capture_uuid) is not None:

View File

@ -41,7 +41,7 @@ from werkzeug.wrappers.response import Response as WerkzeugResponse
from lookyloo import Lookyloo, CaptureSettings from lookyloo import Lookyloo, CaptureSettings
from lookyloo.default import get_config from lookyloo.default import get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.exceptions import MissingUUID, NoValidHarFile, LacusUnreachable
from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
if sys.version_info < (3, 9): if sys.version_info < (3, 9):
@ -1084,6 +1084,7 @@ def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | Werkzeu
if tree_uuid == 'False': if tree_uuid == 'False':
flash("Unable to process your request.", 'warning') flash("Unable to process your request.", 'warning')
return redirect(url_for('index')) return redirect(url_for('index'))
try:
cache = lookyloo.capture_cache(tree_uuid, force_update=True) cache = lookyloo.capture_cache(tree_uuid, force_update=True)
if not cache: if not cache:
status = lookyloo.get_capture_status(tree_uuid) status = lookyloo.get_capture_status(tree_uuid)
@ -1097,6 +1098,9 @@ def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | Werkzeu
# the request for a status. Give it an extra few seconds. # the request for a status. Give it an extra few seconds.
message = "The capture is ongoing." message = "The capture is ongoing."
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid) return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
except LacusUnreachable:
message = "Unable to connect to the Lacus backend, the capture will start as soon as the administrator wakes up."
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
try: try:
ct = lookyloo.get_crawled_tree(tree_uuid) ct = lookyloo.get_crawled_tree(tree_uuid)