fix: Avoid stashing the web interface when lacus becomes unavailable

pull/917/head
Raphaël Vinot 2024-05-17 17:29:11 +02:00
parent 8fb2e2e695
commit 2baa2cd73a
5 changed files with 77 additions and 55 deletions

View File

@ -14,6 +14,7 @@ from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResp
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
from lookyloo import Lookyloo, CaptureSettings
from lookyloo.exceptions import LacusUnreachable
from lookyloo.default import AbstractManager, get_config
from lookyloo.helpers import get_captures_dir
@ -31,7 +32,6 @@ class AsyncCapture(AbstractManager):
self.capture_dir: Path = get_captures_dir()
self.lookyloo = Lookyloo()
if isinstance(self.lookyloo.lacus, LacusCore):
self.captures: set[asyncio.Task] = set() # type: ignore[type-arg]
self.fox = FOX(config_name='FOX')
@ -135,6 +135,7 @@ class AsyncCapture(AbstractManager):
if self.force_stop:
return None
try:
if isinstance(self.lookyloo.lacus, LacusCore):
await self._trigger_captures()
# NOTE: +1 because running this method also counts for one and will
@ -142,8 +143,11 @@ class AsyncCapture(AbstractManager):
self.set_running(len(self.captures) + 1)
self.process_capture_queue()
except LacusUnreachable:
self.logger.error('Lacus is unreachable, retrying later.')
async def _wait_to_finish_async(self) -> None:
try:
if isinstance(self.lookyloo.lacus, LacusCore):
while self.captures:
self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...')
@ -153,6 +157,8 @@ class AsyncCapture(AbstractManager):
self.process_capture_queue()
self.unset_running()
self.logger.info('No more captures')
except LacusUnreachable:
self.logger.error('Lacus is unreachable, nothing to wait for')
def main() -> None:

View File

@ -12,6 +12,7 @@ from typing import Any
from lacuscore import CaptureStatus as CaptureStatusCore
from lookyloo import Lookyloo
from lookyloo.exceptions import LacusUnreachable
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
from lookyloo.helpers import ParsedUserAgent, serialize_to_json
from pylacus import CaptureStatus as CaptureStatusPy
@ -78,6 +79,7 @@ class Processing(AbstractManager):
def _retry_failed_enqueue(self) -> None:
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
to_requeue: list[str] = []
try:
for uuid, _ in self.lookyloo.redis.zscan_iter('to_capture'):
if self.lookyloo.redis.hexists(uuid, 'not_queued'):
# The capture is marked as not queued
@ -97,6 +99,9 @@ class Processing(AbstractManager):
# UUID is still unknown
self.logger.info(f'UUID {uuid} is still unknown')
to_requeue.append(uuid)
except LacusUnreachable:
self.logger.warning('Lacus still unreachable, trying again later')
return None
for uuid in to_requeue:
if self.lookyloo.redis.zscore('to_capture', uuid) is None:
@ -130,6 +135,9 @@ class Processing(AbstractManager):
if new_uuid != uuid:
# somehow, between the check and queuing, the UUID isn't UNKNOWN anymore, just checking that
self.logger.warning(f'Had to change the capture UUID (duplicate). Old: {uuid} / New: {new_uuid}')
except LacusUnreachable:
self.logger.warning('Lacus still unreachable.')
break
except Exception as e:
self.logger.warning(f'Still unable to enqueue capture: {e}')
break

View File

@ -21,3 +21,7 @@ class TreeNeedsRebuild(LookylooException):
class ModuleError(LookylooException):
pass
class LacusUnreachable(LookylooException):
pass

View File

@ -54,7 +54,7 @@ from .capturecache import CaptureCache, CapturesIndex
from .context import Context
from .default import LookylooException, get_homedir, get_config, get_socket_path, safe_create_dir
from .exceptions import (MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
MissingUUID, TreeNeedsRebuild, NoValidHarFile, LacusUnreachable)
from .helpers import (get_captures_dir, get_email_template,
get_resources_hashes, get_taxonomies,
uniq_domains, ParsedUserAgent, load_cookies, UserAgents,
@ -177,9 +177,6 @@ class Lookyloo():
self._captures_index = CapturesIndex(self.redis, self.context, maxsize=cache_max_size)
self.logger.info('Index initialized.')
# init lacus
self.lacus
@property
def redis(self) -> Redis: # type: ignore[type-arg]
return Redis(connection_pool=self.redis_pool)
@ -192,7 +189,7 @@ class Lookyloo():
remote_lacus_config = get_config('generic', 'remote_lacus')
if remote_lacus_config.get('enable'):
self.logger.info("Remote lacus enabled, trying to set it up...")
lacus_retries = 10
lacus_retries = 2
while lacus_retries > 0:
remote_lacus_url = remote_lacus_config.get('url')
self._lacus = PyLacus(remote_lacus_url)
@ -202,9 +199,9 @@ class Lookyloo():
break
lacus_retries -= 1
self.logger.warning(f"Unable to setup remote lacus to {remote_lacus_url}, trying again {lacus_retries} more time(s).")
time.sleep(10)
time.sleep(3)
else:
raise LookylooException('Remote lacus is enabled but unreachable.')
raise LacusUnreachable('Remote lacus is enabled but unreachable.')
if not has_remote_lacus:
# We need a redis connector that doesn't decode.
@ -544,6 +541,9 @@ class Lookyloo():
return CaptureStatusCore.ONGOING
try:
lacus_status = self.lacus.get_capture_status(capture_uuid)
except LacusUnreachable as e:
self.logger.warning(f'Unable to connect to lacus: {e}')
raise e
except Exception as e:
self.logger.warning(f'Unable to get the status for {capture_uuid} from lacus: {e}')
if self.redis.zscore('to_capture', capture_uuid) is not None:

View File

@ -41,7 +41,7 @@ from werkzeug.wrappers.response import Response as WerkzeugResponse
from lookyloo import Lookyloo, CaptureSettings
from lookyloo.default import get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.exceptions import MissingUUID, NoValidHarFile, LacusUnreachable
from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
if sys.version_info < (3, 9):
@ -1084,6 +1084,7 @@ def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | Werkzeu
if tree_uuid == 'False':
flash("Unable to process your request.", 'warning')
return redirect(url_for('index'))
try:
cache = lookyloo.capture_cache(tree_uuid, force_update=True)
if not cache:
status = lookyloo.get_capture_status(tree_uuid)
@ -1097,6 +1098,9 @@ def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | Werkzeu
# the request for a status. Give it an extra few seconds.
message = "The capture is ongoing."
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
except LacusUnreachable:
message = "Unable to connect to the Lacus backend, the capture will start as soon as the administrator wakes up."
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
try:
ct = lookyloo.get_crawled_tree(tree_uuid)