mirror of https://github.com/CIRCL/lookyloo
chg: Improve logging.
parent
25d5beaaaf
commit
c09adec333
|
@ -59,10 +59,11 @@ class AsyncCapture(AbstractManager):
|
|||
if 'cookies' in to_capture:
|
||||
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
|
||||
|
||||
self.logger.info(f'Capturing {to_capture["url"]} - {uuid}')
|
||||
if self._capture(**to_capture): # type: ignore
|
||||
self.logger.info(f'Processed {to_capture["url"]}')
|
||||
self.logger.info(f'Successfully captured {to_capture["url"]} - {uuid}')
|
||||
else:
|
||||
self.logger.warning(f'Unable to capture {to_capture["url"]}')
|
||||
self.logger.warning(f'Unable to capture {to_capture["url"]} - {uuid}')
|
||||
lazy_cleanup.srem('ongoing', uuid)
|
||||
lazy_cleanup.delete(uuid)
|
||||
# make sure to expire the key if nothing was processed for a while (= queues empty)
|
||||
|
|
|
@ -16,7 +16,6 @@ from functools import lru_cache
|
|||
from enum import IntEnum, unique
|
||||
|
||||
from har2tree import CrawledTree, HostNode, URLNode
|
||||
from redis import Redis
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
|
||||
|
@ -264,17 +263,6 @@ def get_useragent_for_requests():
|
|||
return f'Lookyloo / {version}'
|
||||
|
||||
|
||||
def get_capture_status(capture_uuid: str, /) -> CaptureStatus:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'))
|
||||
if r.zrank('to_capture', capture_uuid) is not None:
|
||||
return CaptureStatus.QUEUED
|
||||
elif r.hexists('lookup_dirs', capture_uuid):
|
||||
return CaptureStatus.DONE
|
||||
elif r.sismember('ongoing', capture_uuid):
|
||||
return CaptureStatus.ONGOING
|
||||
return CaptureStatus.UNKNOWN
|
||||
|
||||
|
||||
@lru_cache(64)
|
||||
def get_splash_url() -> str:
|
||||
if os.environ.get('SPLASH_URL_DOCKER'):
|
||||
|
|
|
@ -31,7 +31,7 @@ from werkzeug.useragents import UserAgent
|
|||
from .exceptions import NoValidHarFile, MissingUUID, LookylooException, MissingCaptureDirectory
|
||||
from .helpers import (get_homedir, get_socket_path, get_config, get_email_template, load_pickle_tree,
|
||||
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains,
|
||||
try_make_file, get_captures_dir, get_splash_url)
|
||||
try_make_file, get_captures_dir, get_splash_url, CaptureStatus)
|
||||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP, UniversalWhois, UrlScan
|
||||
from .capturecache import CaptureCache
|
||||
from .context import Context
|
||||
|
@ -563,6 +563,15 @@ class Lookyloo():
|
|||
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
|
||||
return all_cache
|
||||
|
||||
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
|
||||
if self.redis.zrank('to_capture', capture_uuid) is not None:
|
||||
return CaptureStatus.QUEUED
|
||||
elif self.redis.hexists('lookup_dirs', capture_uuid):
|
||||
return CaptureStatus.DONE
|
||||
elif self.redis.sismember('ongoing', capture_uuid):
|
||||
return CaptureStatus.ONGOING
|
||||
return CaptureStatus.UNKNOWN
|
||||
|
||||
def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]:
|
||||
"""Get the cache from redis."""
|
||||
if capture_uuid in self._captures_index and not self._captures_index[capture_uuid].incomplete_redirects:
|
||||
|
@ -570,7 +579,8 @@ class Lookyloo():
|
|||
try:
|
||||
capture_dir = self._get_capture_dir(capture_uuid)
|
||||
except LookylooException:
|
||||
self.logger.warning(f'Unable to find {capture_uuid} (not in the cache and/or missing capture directory).')
|
||||
if self.get_capture_status(capture_uuid) not in [CaptureStatus.QUEUED, CaptureStatus.ONGOING]:
|
||||
self.logger.warning(f'Unable to find {capture_uuid} (not in the cache and/or missing capture directory).')
|
||||
return None
|
||||
|
||||
cached = self.redis.hgetall(str(capture_dir))
|
||||
|
|
|
@ -22,7 +22,7 @@ from werkzeug.security import check_password_hash
|
|||
from pymisp import MISPEvent, MISPServerError
|
||||
|
||||
from lookyloo.helpers import (get_user_agents, get_config, get_taxonomies, load_cookies,
|
||||
CaptureStatus, splash_status, get_capture_status)
|
||||
CaptureStatus, splash_status)
|
||||
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
||||
|
||||
|
@ -556,7 +556,7 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
|
|||
return redirect(url_for('index'))
|
||||
cache = lookyloo.capture_cache(tree_uuid)
|
||||
if not cache:
|
||||
status = get_capture_status(tree_uuid)
|
||||
status = lookyloo.get_capture_status(tree_uuid)
|
||||
splash_up, splash_message = splash_status()
|
||||
if not splash_up:
|
||||
flash(f'The capture module is not reachable ({splash_message}).', 'error')
|
||||
|
|
|
@ -13,7 +13,7 @@ from werkzeug.security import check_password_hash
|
|||
from lookyloo.lookyloo import Lookyloo
|
||||
|
||||
from .helpers import (src_request_ip, load_user_from_request, build_users_table)
|
||||
from lookyloo.helpers import splash_status, get_capture_status
|
||||
from lookyloo.helpers import splash_status
|
||||
|
||||
api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/')
|
||||
|
||||
|
@ -71,7 +71,7 @@ class SplashStatus(Resource):
|
|||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureStatusQuery(Resource):
|
||||
def get(self, capture_uuid: str):
|
||||
return {'status_code': get_capture_status(capture_uuid)}
|
||||
return {'status_code': lookyloo.get_capture_status(capture_uuid)}
|
||||
|
||||
|
||||
@api.route('/json/<string:capture_uuid>/hostnames')
|
||||
|
|
Loading…
Reference in New Issue