chg: Improve errors handling, especially for broken CaptureSettings

pull/926/head
Raphaël Vinot 2024-07-22 13:14:21 +02:00
parent dad0917e4c
commit d5fb385add
5 changed files with 96 additions and 47 deletions

View File

@ -56,7 +56,7 @@ class AsyncCapture(AbstractManager):
'''Get the list of captures ready to be processed''' '''Get the list of captures ready to be processed'''
# Only check if the top 50 in the priority list are done, as they are the most likely ones to be # Only check if the top 50 in the priority list are done, as they are the most likely ones to be
# and if the list it very very long, iterating over it takes a very long time. # and if the list it very very long, iterating over it takes a very long time.
return [uuid for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf', start=0, num=50) return [uuid for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf', start=0, num=500)
if uuid and self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.DONE, CaptureStatusCore.DONE]] if uuid and self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.DONE, CaptureStatusCore.DONE]]
def process_capture_queue(self) -> None: def process_capture_queue(self) -> None:

View File

@ -10,7 +10,7 @@ from collections import Counter
from datetime import date, timedelta from datetime import date, timedelta
from typing import Any from typing import Any
from lacuscore import CaptureStatus as CaptureStatusCore from lacuscore import CaptureStatus as CaptureStatusCore, CaptureSettingsError
from lookyloo import Lookyloo from lookyloo import Lookyloo
from lookyloo.exceptions import LacusUnreachable from lookyloo.exceptions import LacusUnreachable
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
@ -109,6 +109,7 @@ class Processing(AbstractManager):
continue continue
self.logger.info(f'Found a non-queued capture ({uuid}), retrying now.') self.logger.info(f'Found a non-queued capture ({uuid}), retrying now.')
# This capture couldn't be queued and we created the uuid locally # This capture couldn't be queued and we created the uuid locally
try:
if query := self.lookyloo.get_capture_settings(uuid): if query := self.lookyloo.get_capture_settings(uuid):
try: try:
new_uuid = self.lookyloo.lacus.enqueue( new_uuid = self.lookyloo.lacus.enqueue(
@ -144,6 +145,13 @@ class Processing(AbstractManager):
else: else:
self.lookyloo.redis.hdel(uuid, 'not_queued') self.lookyloo.redis.hdel(uuid, 'not_queued')
self.logger.info(f'{uuid} enqueued.') self.logger.info(f'{uuid} enqueued.')
except CaptureSettingsError as e:
self.logger.error(f'Broken settings for {uuid} made their way in the cache, removing them: {e}')
self.lookyloo.redis.zrem('to_capture', uuid)
self.lookyloo.redis.delete(uuid)
except Exception as e:
self.logger.error(f'Unable to requeue {uuid}: {e}')
def main() -> None: def main() -> None:

View File

@ -29,7 +29,7 @@ import mmh3
from defang import defang # type: ignore[import-untyped] from defang import defang # type: ignore[import-untyped]
from har2tree import CrawledTree, HostNode, URLNode from har2tree import CrawledTree, HostNode, URLNode
from lacuscore import (LacusCore, from lacuscore import (LacusCore, CaptureSettingsError,
CaptureStatus as CaptureStatusCore, CaptureStatus as CaptureStatusCore,
# CaptureResponse as CaptureResponseCore) # CaptureResponse as CaptureResponseCore)
# CaptureResponseJson as CaptureResponseJsonCore, # CaptureResponseJson as CaptureResponseJsonCore,
@ -287,15 +287,25 @@ class Lookyloo():
return meta return meta
def get_capture_settings(self, capture_uuid: str, /) -> CaptureSettings | None: def get_capture_settings(self, capture_uuid: str, /) -> CaptureSettings | None:
'''Get the capture settings from the cache or the disk.'''
try:
if capture_settings := self.redis.hgetall(capture_uuid): if capture_settings := self.redis.hgetall(capture_uuid):
return CaptureSettings(**capture_settings) return CaptureSettings(**capture_settings)
except CaptureSettingsError as e:
self.logger.warning(f'Invalid capture settings for {capture_uuid}: {e}')
return None
cache = self.capture_cache(capture_uuid) cache = self.capture_cache(capture_uuid)
if not cache: if not cache:
return None return None
cs_file = cache.capture_dir / 'capture_settings.json' cs_file = cache.capture_dir / 'capture_settings.json'
if cs_file.exists(): if cs_file.exists():
try:
with cs_file.open('r') as f: with cs_file.open('r') as f:
return CaptureSettings(**json.load(f)) return CaptureSettings(**json.load(f))
except CaptureSettingsError as e:
self.logger.warning(f'[In file!] Invalid capture settings for {capture_uuid}: {e}')
return None
return None return None
def categories_capture(self, capture_uuid: str, /) -> dict[str, Any]: def categories_capture(self, capture_uuid: str, /) -> dict[str, Any]:
@ -650,7 +660,11 @@ class Lookyloo():
query.headers['dnt'] = query.dnt query.headers['dnt'] = query.dnt
if authenticated: if authenticated:
if user_config := load_user_config(user): if user_config := load_user_config(user):
try:
query = self._apply_user_config(query, user_config) query = self._apply_user_config(query, user_config)
except CaptureSettingsError as e:
self.logger.critical(f'Unable to apply user config for {user}: {e}')
raise e
priority = get_priority(source, user, authenticated) priority = get_priority(source, user, authenticated)
if priority < -100: if priority < -100:
@ -714,7 +728,11 @@ class Lookyloo():
if to_return['contacts']: if to_return['contacts']:
to_return['all_emails'] |= set(to_return['contacts']) to_return['all_emails'] |= set(to_return['contacts'])
to_return['ips'] = {ip: self.uwhois.whois(ip, contact_email_only=True) for ip in set(hostnode.resolved_ips['v4']) | set(hostnode.resolved_ips['v6'])} to_return['ips'] = {ip: self.uwhois.whois(ip, contact_email_only=True) for ip in set(hostnode.resolved_ips['v4']) | set(hostnode.resolved_ips['v6'])}
if hasattr(hostnode, 'ipasn'):
to_return['asns'] = {asn['asn']: self.uwhois.whois(f'AS{asn["asn"]}', contact_email_only=True) for asn in hostnode.ipasn.values()} to_return['asns'] = {asn['asn']: self.uwhois.whois(f'AS{asn["asn"]}', contact_email_only=True) for asn in hostnode.ipasn.values()}
else:
self.logger.warning(f'No IPASN for {hostnode.name}')
to_return['asns'] = {}
# try to get contact from security.txt file # try to get contact from security.txt file
try: try:
@ -1429,7 +1447,11 @@ class Lookyloo():
error = lookyloo_capture.read(filename).decode() error = lookyloo_capture.read(filename).decode()
elif filename.endswith('capture_settings.json'): elif filename.endswith('capture_settings.json'):
_capture_settings = json.loads(lookyloo_capture.read(filename)) _capture_settings = json.loads(lookyloo_capture.read(filename))
try:
capture_settings = CaptureSettings(**_capture_settings) capture_settings = CaptureSettings(**_capture_settings)
except CaptureSettingsError as e:
unrecoverable_error = True
messages['errors'].append(f'Invalid Capture Settings: {e}')
else: else:
for to_skip in files_to_skip: for to_skip in files_to_skip:
if filename.endswith(to_skip): if filename.endswith(to_skip):

View File

@ -32,7 +32,7 @@ from flask import (Flask, Response, Request, flash, jsonify, redirect, render_te
from flask_bootstrap import Bootstrap5 # type: ignore[import-untyped] from flask_bootstrap import Bootstrap5 # type: ignore[import-untyped]
from flask_cors import CORS # type: ignore[import-untyped] from flask_cors import CORS # type: ignore[import-untyped]
from flask_restx import Api # type: ignore[import-untyped] from flask_restx import Api # type: ignore[import-untyped]
from lacuscore import CaptureStatus from lacuscore import CaptureStatus, CaptureSettingsError
from puremagic import from_string from puremagic import from_string
from pymisp import MISPEvent, MISPServerError # type: ignore[attr-defined] from pymisp import MISPEvent, MISPServerError # type: ignore[attr-defined]
from werkzeug.security import check_password_hash from werkzeug.security import check_password_hash
@ -283,6 +283,16 @@ def file_response(func): # type: ignore[no-untyped-def]
return wrapper return wrapper
@app.errorhandler(CaptureSettingsError)
def handle_pydandic_validation_exception(error: CaptureSettingsError) -> Response | str | WerkzeugResponse:
'''Return the validation error message and 400 status code'''
if error.pydantic_validation_errors:
flash(f'Unable to validate capture settings: {error.pydantic_validation_errors.errors()}')
else:
flash(str(error))
return redirect(url_for('landing_page'))
# ##### Methods querying the indexes ##### # ##### Methods querying the indexes #####
def _get_body_hash_investigator(body_hash: str, /) -> tuple[list[tuple[str, str, datetime, str, str]], list[tuple[str, float]]]: def _get_body_hash_investigator(body_hash: str, /) -> tuple[list[tuple[str, str, datetime, str, str]], list[tuple[str, float]]]:

View File

@ -17,7 +17,7 @@ from flask import request, send_file, Response
from flask_restx import Namespace, Resource, fields, abort # type: ignore[import-untyped] from flask_restx import Namespace, Resource, fields, abort # type: ignore[import-untyped]
from werkzeug.security import check_password_hash from werkzeug.security import check_password_hash
from lacuscore import CaptureStatus as CaptureStatusCore from lacuscore import CaptureStatus as CaptureStatusCore, CaptureSettingsError
from pylacus import CaptureStatus as CaptureStatusPy from pylacus import CaptureStatus as CaptureStatusPy
from lookyloo import CaptureSettings, Lookyloo from lookyloo import CaptureSettings, Lookyloo
from lookyloo.comparator import Comparator from lookyloo.comparator import Comparator
@ -51,6 +51,15 @@ def handle_no_HAR_file_exception(error: Any) -> tuple[dict[str, str], int]:
return {'message': str(error)}, 400 return {'message': str(error)}, 400
@api.errorhandler(CaptureSettingsError) # type: ignore[misc]
def handle_pydandic_validation_exception(error: CaptureSettingsError) -> tuple[dict[str, Any], int]:
'''Return the validation error message and 400 status code'''
if error.pydantic_validation_errors:
return {'message': 'Unable to validate capture settings.',
'details': error.pydantic_validation_errors.errors()}, 400
return {'message': str(error)}, 400
@api.route('/json/get_user_config') @api.route('/json/get_user_config')
@api.doc(description='Get the configuration of the user (if any)', security='apikey') @api.doc(description='Get the configuration of the user (if any)', security='apikey')
class UserConfig(Resource): # type: ignore[misc] class UserConfig(Resource): # type: ignore[misc]