chg: Improve logging.

pull/254/head
Raphaël Vinot 2021-09-01 14:08:25 +02:00
parent 25d5beaaaf
commit c09adec333
5 changed files with 19 additions and 20 deletions

View File

@ -59,10 +59,11 @@ class AsyncCapture(AbstractManager):
if 'cookies' in to_capture: if 'cookies' in to_capture:
to_capture['cookies_pseudofile'] = to_capture.pop('cookies') to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
self.logger.info(f'Capturing {to_capture["url"]} - {uuid}')
if self._capture(**to_capture): # type: ignore if self._capture(**to_capture): # type: ignore
self.logger.info(f'Processed {to_capture["url"]}') self.logger.info(f'Successfully captured {to_capture["url"]} - {uuid}')
else: else:
self.logger.warning(f'Unable to capture {to_capture["url"]}') self.logger.warning(f'Unable to capture {to_capture["url"]} - {uuid}')
lazy_cleanup.srem('ongoing', uuid) lazy_cleanup.srem('ongoing', uuid)
lazy_cleanup.delete(uuid) lazy_cleanup.delete(uuid)
# make sure to expire the key if nothing was processed for a while (= queues empty) # make sure to expire the key if nothing was processed for a while (= queues empty)

View File

@ -16,7 +16,6 @@ from functools import lru_cache
from enum import IntEnum, unique from enum import IntEnum, unique
from har2tree import CrawledTree, HostNode, URLNode from har2tree import CrawledTree, HostNode, URLNode
from redis import Redis
import requests import requests
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from publicsuffix2 import PublicSuffixList, fetch # type: ignore from publicsuffix2 import PublicSuffixList, fetch # type: ignore
@ -264,17 +263,6 @@ def get_useragent_for_requests():
return f'Lookyloo / {version}' return f'Lookyloo / {version}'
def get_capture_status(capture_uuid: str, /) -> CaptureStatus:
r = Redis(unix_socket_path=get_socket_path('cache'))
if r.zrank('to_capture', capture_uuid) is not None:
return CaptureStatus.QUEUED
elif r.hexists('lookup_dirs', capture_uuid):
return CaptureStatus.DONE
elif r.sismember('ongoing', capture_uuid):
return CaptureStatus.ONGOING
return CaptureStatus.UNKNOWN
@lru_cache(64) @lru_cache(64)
def get_splash_url() -> str: def get_splash_url() -> str:
if os.environ.get('SPLASH_URL_DOCKER'): if os.environ.get('SPLASH_URL_DOCKER'):

View File

@ -31,7 +31,7 @@ from werkzeug.useragents import UserAgent
from .exceptions import NoValidHarFile, MissingUUID, LookylooException, MissingCaptureDirectory from .exceptions import NoValidHarFile, MissingUUID, LookylooException, MissingCaptureDirectory
from .helpers import (get_homedir, get_socket_path, get_config, get_email_template, load_pickle_tree, from .helpers import (get_homedir, get_socket_path, get_config, get_email_template, load_pickle_tree,
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains, remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains,
try_make_file, get_captures_dir, get_splash_url) try_make_file, get_captures_dir, get_splash_url, CaptureStatus)
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP, UniversalWhois, UrlScan from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP, UniversalWhois, UrlScan
from .capturecache import CaptureCache from .capturecache import CaptureCache
from .context import Context from .context import Context
@ -563,6 +563,15 @@ class Lookyloo():
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True) all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
return all_cache return all_cache
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
if self.redis.zrank('to_capture', capture_uuid) is not None:
return CaptureStatus.QUEUED
elif self.redis.hexists('lookup_dirs', capture_uuid):
return CaptureStatus.DONE
elif self.redis.sismember('ongoing', capture_uuid):
return CaptureStatus.ONGOING
return CaptureStatus.UNKNOWN
def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]: def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]:
"""Get the cache from redis.""" """Get the cache from redis."""
if capture_uuid in self._captures_index and not self._captures_index[capture_uuid].incomplete_redirects: if capture_uuid in self._captures_index and not self._captures_index[capture_uuid].incomplete_redirects:
@ -570,7 +579,8 @@ class Lookyloo():
try: try:
capture_dir = self._get_capture_dir(capture_uuid) capture_dir = self._get_capture_dir(capture_uuid)
except LookylooException: except LookylooException:
self.logger.warning(f'Unable to find {capture_uuid} (not in the cache and/or missing capture directory).') if self.get_capture_status(capture_uuid) not in [CaptureStatus.QUEUED, CaptureStatus.ONGOING]:
self.logger.warning(f'Unable to find {capture_uuid} (not in the cache and/or missing capture directory).')
return None return None
cached = self.redis.hgetall(str(capture_dir)) cached = self.redis.hgetall(str(capture_dir))

View File

@ -22,7 +22,7 @@ from werkzeug.security import check_password_hash
from pymisp import MISPEvent, MISPServerError from pymisp import MISPEvent, MISPServerError
from lookyloo.helpers import (get_user_agents, get_config, get_taxonomies, load_cookies, from lookyloo.helpers import (get_user_agents, get_config, get_taxonomies, load_cookies,
CaptureStatus, splash_status, get_capture_status) CaptureStatus, splash_status)
from lookyloo.lookyloo import Lookyloo, Indexing from lookyloo.lookyloo import Lookyloo, Indexing
from lookyloo.exceptions import NoValidHarFile, MissingUUID from lookyloo.exceptions import NoValidHarFile, MissingUUID
@ -556,7 +556,7 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
return redirect(url_for('index')) return redirect(url_for('index'))
cache = lookyloo.capture_cache(tree_uuid) cache = lookyloo.capture_cache(tree_uuid)
if not cache: if not cache:
status = get_capture_status(tree_uuid) status = lookyloo.get_capture_status(tree_uuid)
splash_up, splash_message = splash_status() splash_up, splash_message = splash_status()
if not splash_up: if not splash_up:
flash(f'The capture module is not reachable ({splash_message}).', 'error') flash(f'The capture module is not reachable ({splash_message}).', 'error')

View File

@ -13,7 +13,7 @@ from werkzeug.security import check_password_hash
from lookyloo.lookyloo import Lookyloo from lookyloo.lookyloo import Lookyloo
from .helpers import (src_request_ip, load_user_from_request, build_users_table) from .helpers import (src_request_ip, load_user_from_request, build_users_table)
from lookyloo.helpers import splash_status, get_capture_status from lookyloo.helpers import splash_status
api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/') api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/')
@ -71,7 +71,7 @@ class SplashStatus(Resource):
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureStatusQuery(Resource): class CaptureStatusQuery(Resource):
def get(self, capture_uuid: str): def get(self, capture_uuid: str):
return {'status_code': get_capture_status(capture_uuid)} return {'status_code': lookyloo.get_capture_status(capture_uuid)}
@api.route('/json/<string:capture_uuid>/hostnames') @api.route('/json/<string:capture_uuid>/hostnames')