fix: Avoid stashing the web interface when lacus becomes unavailable

pull/917/head
Raphaël Vinot 2024-05-17 17:29:11 +02:00
parent 8fb2e2e695
commit 2baa2cd73a
5 changed files with 77 additions and 55 deletions

View File

@ -14,6 +14,7 @@ from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResp
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
from lookyloo import Lookyloo, CaptureSettings
from lookyloo.exceptions import LacusUnreachable
from lookyloo.default import AbstractManager, get_config
from lookyloo.helpers import get_captures_dir
@ -31,8 +32,7 @@ class AsyncCapture(AbstractManager):
self.capture_dir: Path = get_captures_dir()
self.lookyloo = Lookyloo()
if isinstance(self.lookyloo.lacus, LacusCore):
self.captures: set[asyncio.Task] = set() # type: ignore[type-arg]
self.captures: set[asyncio.Task] = set() # type: ignore[type-arg]
self.fox = FOX(config_name='FOX')
if not self.fox.available:
@ -135,24 +135,30 @@ class AsyncCapture(AbstractManager):
if self.force_stop:
return None
if isinstance(self.lookyloo.lacus, LacusCore):
await self._trigger_captures()
# NOTE: +1 because running this method also counts for one and will
# be decremented when it finishes
self.set_running(len(self.captures) + 1)
try:
if isinstance(self.lookyloo.lacus, LacusCore):
await self._trigger_captures()
# NOTE: +1 because running this method also counts for one and will
# be decremented when it finishes
self.set_running(len(self.captures) + 1)
self.process_capture_queue()
self.process_capture_queue()
except LacusUnreachable:
self.logger.error('Lacus is unreachable, retrying later.')
async def _wait_to_finish_async(self) -> None:
if isinstance(self.lookyloo.lacus, LacusCore):
while self.captures:
self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...')
await asyncio.sleep(5)
# NOTE: +1 so we don't quit before the final process capture queue
self.set_running(len(self.captures) + 1)
self.process_capture_queue()
self.unset_running()
self.logger.info('No more captures')
try:
if isinstance(self.lookyloo.lacus, LacusCore):
while self.captures:
self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...')
await asyncio.sleep(5)
# NOTE: +1 so we don't quit before the final process capture queue
self.set_running(len(self.captures) + 1)
self.process_capture_queue()
self.unset_running()
self.logger.info('No more captures')
except LacusUnreachable:
self.logger.error('Lacus is unreachable, nothing to wait for')
def main() -> None:

View File

@ -12,6 +12,7 @@ from typing import Any
from lacuscore import CaptureStatus as CaptureStatusCore
from lookyloo import Lookyloo
from lookyloo.exceptions import LacusUnreachable
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
from lookyloo.helpers import ParsedUserAgent, serialize_to_json
from pylacus import CaptureStatus as CaptureStatusPy
@ -78,25 +79,29 @@ class Processing(AbstractManager):
def _retry_failed_enqueue(self) -> None:
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
to_requeue: list[str] = []
for uuid, _ in self.lookyloo.redis.zscan_iter('to_capture'):
if self.lookyloo.redis.hexists(uuid, 'not_queued'):
# The capture is marked as not queued
to_requeue.append(uuid)
elif self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]:
# The capture is unknown on lacus side. It might be a race condition.
# Let's retry a few times.
retry = 3
while retry > 0:
time.sleep(1)
if self.lookyloo.lacus.get_capture_status(uuid) not in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]:
# Was a race condition, the UUID has been or is being processed by Lacus
self.logger.info(f'UUID {uuid} was only temporary unknown')
break
retry -= 1
else:
# UUID is still unknown
self.logger.info(f'UUID {uuid} is still unknown')
try:
for uuid, _ in self.lookyloo.redis.zscan_iter('to_capture'):
if self.lookyloo.redis.hexists(uuid, 'not_queued'):
# The capture is marked as not queued
to_requeue.append(uuid)
elif self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]:
# The capture is unknown on lacus side. It might be a race condition.
# Let's retry a few times.
retry = 3
while retry > 0:
time.sleep(1)
if self.lookyloo.lacus.get_capture_status(uuid) not in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]:
# Was a race condition, the UUID has been or is being processed by Lacus
self.logger.info(f'UUID {uuid} was only temporary unknown')
break
retry -= 1
else:
# UUID is still unknown
self.logger.info(f'UUID {uuid} is still unknown')
to_requeue.append(uuid)
except LacusUnreachable:
self.logger.warning('Lacus still unreachable, trying again later')
return None
for uuid in to_requeue:
if self.lookyloo.redis.zscore('to_capture', uuid) is None:
@ -130,6 +135,9 @@ class Processing(AbstractManager):
if new_uuid != uuid:
# somehow, between the check and queuing, the UUID isn't UNKNOWN anymore, just checking that
self.logger.warning(f'Had to change the capture UUID (duplicate). Old: {uuid} / New: {new_uuid}')
except LacusUnreachable:
self.logger.warning('Lacus still unreachable.')
break
except Exception as e:
self.logger.warning(f'Still unable to enqueue capture: {e}')
break

View File

@ -21,3 +21,7 @@ class TreeNeedsRebuild(LookylooException):
class ModuleError(LookylooException):
pass
class LacusUnreachable(LookylooException):
pass

View File

@ -54,7 +54,7 @@ from .capturecache import CaptureCache, CapturesIndex
from .context import Context
from .default import LookylooException, get_homedir, get_config, get_socket_path, safe_create_dir
from .exceptions import (MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
MissingUUID, TreeNeedsRebuild, NoValidHarFile, LacusUnreachable)
from .helpers import (get_captures_dir, get_email_template,
get_resources_hashes, get_taxonomies,
uniq_domains, ParsedUserAgent, load_cookies, UserAgents,
@ -177,9 +177,6 @@ class Lookyloo():
self._captures_index = CapturesIndex(self.redis, self.context, maxsize=cache_max_size)
self.logger.info('Index initialized.')
# init lacus
self.lacus
@property
def redis(self) -> Redis: # type: ignore[type-arg]
return Redis(connection_pool=self.redis_pool)
@ -192,7 +189,7 @@ class Lookyloo():
remote_lacus_config = get_config('generic', 'remote_lacus')
if remote_lacus_config.get('enable'):
self.logger.info("Remote lacus enabled, trying to set it up...")
lacus_retries = 10
lacus_retries = 2
while lacus_retries > 0:
remote_lacus_url = remote_lacus_config.get('url')
self._lacus = PyLacus(remote_lacus_url)
@ -202,9 +199,9 @@ class Lookyloo():
break
lacus_retries -= 1
self.logger.warning(f"Unable to setup remote lacus to {remote_lacus_url}, trying again {lacus_retries} more time(s).")
time.sleep(10)
time.sleep(3)
else:
raise LookylooException('Remote lacus is enabled but unreachable.')
raise LacusUnreachable('Remote lacus is enabled but unreachable.')
if not has_remote_lacus:
# We need a redis connector that doesn't decode.
@ -544,6 +541,9 @@ class Lookyloo():
return CaptureStatusCore.ONGOING
try:
lacus_status = self.lacus.get_capture_status(capture_uuid)
except LacusUnreachable as e:
self.logger.warning(f'Unable to connect to lacus: {e}')
raise e
except Exception as e:
self.logger.warning(f'Unable to get the status for {capture_uuid} from lacus: {e}')
if self.redis.zscore('to_capture', capture_uuid) is not None:

View File

@ -41,7 +41,7 @@ from werkzeug.wrappers.response import Response as WerkzeugResponse
from lookyloo import Lookyloo, CaptureSettings
from lookyloo.default import get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.exceptions import MissingUUID, NoValidHarFile, LacusUnreachable
from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
if sys.version_info < (3, 9):
@ -1084,18 +1084,22 @@ def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | Werkzeu
if tree_uuid == 'False':
flash("Unable to process your request.", 'warning')
return redirect(url_for('index'))
cache = lookyloo.capture_cache(tree_uuid, force_update=True)
if not cache:
status = lookyloo.get_capture_status(tree_uuid)
if status == CaptureStatus.UNKNOWN:
flash(f'Unable to find this UUID ({tree_uuid}).', 'warning')
return index_generic()
elif status == CaptureStatus.QUEUED:
message = "The capture is queued, but didn't start yet."
elif status in [CaptureStatus.ONGOING, CaptureStatus.DONE]:
# If CaptureStatus.DONE, the capture finished between the query to the cache and
# the request for a status. Give it an extra few seconds.
message = "The capture is ongoing."
try:
cache = lookyloo.capture_cache(tree_uuid, force_update=True)
if not cache:
status = lookyloo.get_capture_status(tree_uuid)
if status == CaptureStatus.UNKNOWN:
flash(f'Unable to find this UUID ({tree_uuid}).', 'warning')
return index_generic()
elif status == CaptureStatus.QUEUED:
message = "The capture is queued, but didn't start yet."
elif status in [CaptureStatus.ONGOING, CaptureStatus.DONE]:
# If CaptureStatus.DONE, the capture finished between the query to the cache and
# the request for a status. Give it an extra few seconds.
message = "The capture is ongoing."
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
except LacusUnreachable:
message = "Unable to connect to the Lacus backend, the capture will start as soon as the administrator wakes up."
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
try: