chg: Improve logging.

pull/254/head
Raphaël Vinot 2021-09-01 14:08:25 +02:00
parent 25d5beaaaf
commit c09adec333
5 changed files with 19 additions and 20 deletions

View File

@ -59,10 +59,11 @@ class AsyncCapture(AbstractManager):
if 'cookies' in to_capture:
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
self.logger.info(f'Capturing {to_capture["url"]} - {uuid}')
if self._capture(**to_capture): # type: ignore
self.logger.info(f'Processed {to_capture["url"]}')
self.logger.info(f'Successfully captured {to_capture["url"]} - {uuid}')
else:
self.logger.warning(f'Unable to capture {to_capture["url"]}')
self.logger.warning(f'Unable to capture {to_capture["url"]} - {uuid}')
lazy_cleanup.srem('ongoing', uuid)
lazy_cleanup.delete(uuid)
# make sure to expire the key if nothing was processed for a while (= queues empty)

View File

@ -16,7 +16,6 @@ from functools import lru_cache
from enum import IntEnum, unique
from har2tree import CrawledTree, HostNode, URLNode
from redis import Redis
import requests
from requests.exceptions import HTTPError
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
@ -264,17 +263,6 @@ def get_useragent_for_requests():
return f'Lookyloo / {version}'
def get_capture_status(capture_uuid: str, /) -> CaptureStatus:
r = Redis(unix_socket_path=get_socket_path('cache'))
if r.zrank('to_capture', capture_uuid) is not None:
return CaptureStatus.QUEUED
elif r.hexists('lookup_dirs', capture_uuid):
return CaptureStatus.DONE
elif r.sismember('ongoing', capture_uuid):
return CaptureStatus.ONGOING
return CaptureStatus.UNKNOWN
@lru_cache(64)
def get_splash_url() -> str:
if os.environ.get('SPLASH_URL_DOCKER'):

View File

@ -31,7 +31,7 @@ from werkzeug.useragents import UserAgent
from .exceptions import NoValidHarFile, MissingUUID, LookylooException, MissingCaptureDirectory
from .helpers import (get_homedir, get_socket_path, get_config, get_email_template, load_pickle_tree,
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains,
try_make_file, get_captures_dir, get_splash_url)
try_make_file, get_captures_dir, get_splash_url, CaptureStatus)
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP, UniversalWhois, UrlScan
from .capturecache import CaptureCache
from .context import Context
@ -563,6 +563,15 @@ class Lookyloo():
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
return all_cache
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
if self.redis.zrank('to_capture', capture_uuid) is not None:
return CaptureStatus.QUEUED
elif self.redis.hexists('lookup_dirs', capture_uuid):
return CaptureStatus.DONE
elif self.redis.sismember('ongoing', capture_uuid):
return CaptureStatus.ONGOING
return CaptureStatus.UNKNOWN
def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]:
"""Get the cache from redis."""
if capture_uuid in self._captures_index and not self._captures_index[capture_uuid].incomplete_redirects:
@ -570,6 +579,7 @@ class Lookyloo():
try:
capture_dir = self._get_capture_dir(capture_uuid)
except LookylooException:
if self.get_capture_status(capture_uuid) not in [CaptureStatus.QUEUED, CaptureStatus.ONGOING]:
self.logger.warning(f'Unable to find {capture_uuid} (not in the cache and/or missing capture directory).')
return None

View File

@ -22,7 +22,7 @@ from werkzeug.security import check_password_hash
from pymisp import MISPEvent, MISPServerError
from lookyloo.helpers import (get_user_agents, get_config, get_taxonomies, load_cookies,
CaptureStatus, splash_status, get_capture_status)
CaptureStatus, splash_status)
from lookyloo.lookyloo import Lookyloo, Indexing
from lookyloo.exceptions import NoValidHarFile, MissingUUID
@ -556,7 +556,7 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
return redirect(url_for('index'))
cache = lookyloo.capture_cache(tree_uuid)
if not cache:
status = get_capture_status(tree_uuid)
status = lookyloo.get_capture_status(tree_uuid)
splash_up, splash_message = splash_status()
if not splash_up:
flash(f'The capture module is not reachable ({splash_message}).', 'error')

View File

@ -13,7 +13,7 @@ from werkzeug.security import check_password_hash
from lookyloo.lookyloo import Lookyloo
from .helpers import (src_request_ip, load_user_from_request, build_users_table)
from lookyloo.helpers import splash_status, get_capture_status
from lookyloo.helpers import splash_status
api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/')
@ -71,7 +71,7 @@ class SplashStatus(Resource):
params={'capture_uuid': 'The UUID of the capture'})
class CaptureStatusQuery(Resource):
def get(self, capture_uuid: str):
return {'status_code': get_capture_status(capture_uuid)}
return {'status_code': lookyloo.get_capture_status(capture_uuid)}
@api.route('/json/<string:capture_uuid>/hostnames')