mirror of https://github.com/CIRCL/lookyloo
chg: Improve errors handling, especially for broken CaptureSettings
parent
dad0917e4c
commit
d5fb385add
|
@ -56,7 +56,7 @@ class AsyncCapture(AbstractManager):
|
|||
'''Get the list of captures ready to be processed'''
|
||||
# Only check if the top 50 in the priority list are done, as they are the most likely ones to be
|
||||
# and if the list it very very long, iterating over it takes a very long time.
|
||||
return [uuid for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf', start=0, num=50)
|
||||
return [uuid for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf', start=0, num=500)
|
||||
if uuid and self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.DONE, CaptureStatusCore.DONE]]
|
||||
|
||||
def process_capture_queue(self) -> None:
|
||||
|
|
|
@ -10,7 +10,7 @@ from collections import Counter
|
|||
from datetime import date, timedelta
|
||||
from typing import Any
|
||||
|
||||
from lacuscore import CaptureStatus as CaptureStatusCore
|
||||
from lacuscore import CaptureStatus as CaptureStatusCore, CaptureSettingsError
|
||||
from lookyloo import Lookyloo
|
||||
from lookyloo.exceptions import LacusUnreachable
|
||||
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
|
||||
|
@ -109,41 +109,49 @@ class Processing(AbstractManager):
|
|||
continue
|
||||
self.logger.info(f'Found a non-queued capture ({uuid}), retrying now.')
|
||||
# This capture couldn't be queued and we created the uuid locally
|
||||
if query := self.lookyloo.get_capture_settings(uuid):
|
||||
try:
|
||||
new_uuid = self.lookyloo.lacus.enqueue(
|
||||
url=query.url,
|
||||
document_name=query.document_name,
|
||||
document=query.document,
|
||||
# depth=query.depth,
|
||||
browser=query.browser,
|
||||
device_name=query.device_name,
|
||||
user_agent=query.user_agent,
|
||||
proxy=query.proxy,
|
||||
general_timeout_in_sec=query.general_timeout_in_sec,
|
||||
cookies=query.cookies,
|
||||
headers=query.headers,
|
||||
http_credentials=query.http_credentials,
|
||||
viewport=query.viewport,
|
||||
referer=query.referer,
|
||||
rendered_hostname_only=query.rendered_hostname_only,
|
||||
# force=query.force,
|
||||
# recapture_interval=query.recapture_interval,
|
||||
priority=query.priority,
|
||||
uuid=uuid
|
||||
)
|
||||
if new_uuid != uuid:
|
||||
# somehow, between the check and queuing, the UUID isn't UNKNOWN anymore, just checking that
|
||||
self.logger.warning(f'Had to change the capture UUID (duplicate). Old: {uuid} / New: {new_uuid}')
|
||||
except LacusUnreachable:
|
||||
self.logger.warning('Lacus still unreachable.')
|
||||
break
|
||||
except Exception as e:
|
||||
self.logger.warning(f'Still unable to enqueue capture: {e}')
|
||||
break
|
||||
else:
|
||||
self.lookyloo.redis.hdel(uuid, 'not_queued')
|
||||
self.logger.info(f'{uuid} enqueued.')
|
||||
try:
|
||||
if query := self.lookyloo.get_capture_settings(uuid):
|
||||
try:
|
||||
new_uuid = self.lookyloo.lacus.enqueue(
|
||||
url=query.url,
|
||||
document_name=query.document_name,
|
||||
document=query.document,
|
||||
# depth=query.depth,
|
||||
browser=query.browser,
|
||||
device_name=query.device_name,
|
||||
user_agent=query.user_agent,
|
||||
proxy=query.proxy,
|
||||
general_timeout_in_sec=query.general_timeout_in_sec,
|
||||
cookies=query.cookies,
|
||||
headers=query.headers,
|
||||
http_credentials=query.http_credentials,
|
||||
viewport=query.viewport,
|
||||
referer=query.referer,
|
||||
rendered_hostname_only=query.rendered_hostname_only,
|
||||
# force=query.force,
|
||||
# recapture_interval=query.recapture_interval,
|
||||
priority=query.priority,
|
||||
uuid=uuid
|
||||
)
|
||||
if new_uuid != uuid:
|
||||
# somehow, between the check and queuing, the UUID isn't UNKNOWN anymore, just checking that
|
||||
self.logger.warning(f'Had to change the capture UUID (duplicate). Old: {uuid} / New: {new_uuid}')
|
||||
except LacusUnreachable:
|
||||
self.logger.warning('Lacus still unreachable.')
|
||||
break
|
||||
except Exception as e:
|
||||
self.logger.warning(f'Still unable to enqueue capture: {e}')
|
||||
break
|
||||
else:
|
||||
self.lookyloo.redis.hdel(uuid, 'not_queued')
|
||||
self.logger.info(f'{uuid} enqueued.')
|
||||
except CaptureSettingsError as e:
|
||||
self.logger.error(f'Broken settings for {uuid} made their way in the cache, removing them: {e}')
|
||||
self.lookyloo.redis.zrem('to_capture', uuid)
|
||||
self.lookyloo.redis.delete(uuid)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f'Unable to requeue {uuid}: {e}')
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
|
|
@ -29,7 +29,7 @@ import mmh3
|
|||
|
||||
from defang import defang # type: ignore[import-untyped]
|
||||
from har2tree import CrawledTree, HostNode, URLNode
|
||||
from lacuscore import (LacusCore,
|
||||
from lacuscore import (LacusCore, CaptureSettingsError,
|
||||
CaptureStatus as CaptureStatusCore,
|
||||
# CaptureResponse as CaptureResponseCore)
|
||||
# CaptureResponseJson as CaptureResponseJsonCore,
|
||||
|
@ -287,15 +287,25 @@ class Lookyloo():
|
|||
return meta
|
||||
|
||||
def get_capture_settings(self, capture_uuid: str, /) -> CaptureSettings | None:
|
||||
if capture_settings := self.redis.hgetall(capture_uuid):
|
||||
return CaptureSettings(**capture_settings)
|
||||
'''Get the capture settings from the cache or the disk.'''
|
||||
try:
|
||||
if capture_settings := self.redis.hgetall(capture_uuid):
|
||||
return CaptureSettings(**capture_settings)
|
||||
except CaptureSettingsError as e:
|
||||
self.logger.warning(f'Invalid capture settings for {capture_uuid}: {e}')
|
||||
return None
|
||||
cache = self.capture_cache(capture_uuid)
|
||||
if not cache:
|
||||
return None
|
||||
cs_file = cache.capture_dir / 'capture_settings.json'
|
||||
if cs_file.exists():
|
||||
with cs_file.open('r') as f:
|
||||
return CaptureSettings(**json.load(f))
|
||||
try:
|
||||
with cs_file.open('r') as f:
|
||||
return CaptureSettings(**json.load(f))
|
||||
except CaptureSettingsError as e:
|
||||
self.logger.warning(f'[In file!] Invalid capture settings for {capture_uuid}: {e}')
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def categories_capture(self, capture_uuid: str, /) -> dict[str, Any]:
|
||||
|
@ -650,7 +660,11 @@ class Lookyloo():
|
|||
query.headers['dnt'] = query.dnt
|
||||
if authenticated:
|
||||
if user_config := load_user_config(user):
|
||||
query = self._apply_user_config(query, user_config)
|
||||
try:
|
||||
query = self._apply_user_config(query, user_config)
|
||||
except CaptureSettingsError as e:
|
||||
self.logger.critical(f'Unable to apply user config for {user}: {e}')
|
||||
raise e
|
||||
|
||||
priority = get_priority(source, user, authenticated)
|
||||
if priority < -100:
|
||||
|
@ -714,7 +728,11 @@ class Lookyloo():
|
|||
if to_return['contacts']:
|
||||
to_return['all_emails'] |= set(to_return['contacts'])
|
||||
to_return['ips'] = {ip: self.uwhois.whois(ip, contact_email_only=True) for ip in set(hostnode.resolved_ips['v4']) | set(hostnode.resolved_ips['v6'])}
|
||||
to_return['asns'] = {asn['asn']: self.uwhois.whois(f'AS{asn["asn"]}', contact_email_only=True) for asn in hostnode.ipasn.values()}
|
||||
if hasattr(hostnode, 'ipasn'):
|
||||
to_return['asns'] = {asn['asn']: self.uwhois.whois(f'AS{asn["asn"]}', contact_email_only=True) for asn in hostnode.ipasn.values()}
|
||||
else:
|
||||
self.logger.warning(f'No IPASN for {hostnode.name}')
|
||||
to_return['asns'] = {}
|
||||
|
||||
# try to get contact from security.txt file
|
||||
try:
|
||||
|
@ -1429,7 +1447,11 @@ class Lookyloo():
|
|||
error = lookyloo_capture.read(filename).decode()
|
||||
elif filename.endswith('capture_settings.json'):
|
||||
_capture_settings = json.loads(lookyloo_capture.read(filename))
|
||||
capture_settings = CaptureSettings(**_capture_settings)
|
||||
try:
|
||||
capture_settings = CaptureSettings(**_capture_settings)
|
||||
except CaptureSettingsError as e:
|
||||
unrecoverable_error = True
|
||||
messages['errors'].append(f'Invalid Capture Settings: {e}')
|
||||
else:
|
||||
for to_skip in files_to_skip:
|
||||
if filename.endswith(to_skip):
|
||||
|
|
|
@ -32,7 +32,7 @@ from flask import (Flask, Response, Request, flash, jsonify, redirect, render_te
|
|||
from flask_bootstrap import Bootstrap5 # type: ignore[import-untyped]
|
||||
from flask_cors import CORS # type: ignore[import-untyped]
|
||||
from flask_restx import Api # type: ignore[import-untyped]
|
||||
from lacuscore import CaptureStatus
|
||||
from lacuscore import CaptureStatus, CaptureSettingsError
|
||||
from puremagic import from_string
|
||||
from pymisp import MISPEvent, MISPServerError # type: ignore[attr-defined]
|
||||
from werkzeug.security import check_password_hash
|
||||
|
@ -283,6 +283,16 @@ def file_response(func): # type: ignore[no-untyped-def]
|
|||
return wrapper
|
||||
|
||||
|
||||
@app.errorhandler(CaptureSettingsError)
|
||||
def handle_pydandic_validation_exception(error: CaptureSettingsError) -> Response | str | WerkzeugResponse:
|
||||
'''Return the validation error message and 400 status code'''
|
||||
if error.pydantic_validation_errors:
|
||||
flash(f'Unable to validate capture settings: {error.pydantic_validation_errors.errors()}')
|
||||
else:
|
||||
flash(str(error))
|
||||
return redirect(url_for('landing_page'))
|
||||
|
||||
|
||||
# ##### Methods querying the indexes #####
|
||||
|
||||
def _get_body_hash_investigator(body_hash: str, /) -> tuple[list[tuple[str, str, datetime, str, str]], list[tuple[str, float]]]:
|
||||
|
|
|
@ -17,7 +17,7 @@ from flask import request, send_file, Response
|
|||
from flask_restx import Namespace, Resource, fields, abort # type: ignore[import-untyped]
|
||||
from werkzeug.security import check_password_hash
|
||||
|
||||
from lacuscore import CaptureStatus as CaptureStatusCore
|
||||
from lacuscore import CaptureStatus as CaptureStatusCore, CaptureSettingsError
|
||||
from pylacus import CaptureStatus as CaptureStatusPy
|
||||
from lookyloo import CaptureSettings, Lookyloo
|
||||
from lookyloo.comparator import Comparator
|
||||
|
@ -51,6 +51,15 @@ def handle_no_HAR_file_exception(error: Any) -> tuple[dict[str, str], int]:
|
|||
return {'message': str(error)}, 400
|
||||
|
||||
|
||||
@api.errorhandler(CaptureSettingsError) # type: ignore[misc]
|
||||
def handle_pydandic_validation_exception(error: CaptureSettingsError) -> tuple[dict[str, Any], int]:
|
||||
'''Return the validation error message and 400 status code'''
|
||||
if error.pydantic_validation_errors:
|
||||
return {'message': 'Unable to validate capture settings.',
|
||||
'details': error.pydantic_validation_errors.errors()}, 400
|
||||
return {'message': str(error)}, 400
|
||||
|
||||
|
||||
@api.route('/json/get_user_config')
|
||||
@api.doc(description='Get the configuration of the user (if any)', security='apikey')
|
||||
class UserConfig(Resource): # type: ignore[misc]
|
||||
|
|
Loading…
Reference in New Issue