mirror of https://github.com/CIRCL/lookyloo
chg: Improve logging.
parent
25d5beaaaf
commit
c09adec333
|
@ -59,10 +59,11 @@ class AsyncCapture(AbstractManager):
|
||||||
if 'cookies' in to_capture:
|
if 'cookies' in to_capture:
|
||||||
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
|
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
|
||||||
|
|
||||||
|
self.logger.info(f'Capturing {to_capture["url"]} - {uuid}')
|
||||||
if self._capture(**to_capture): # type: ignore
|
if self._capture(**to_capture): # type: ignore
|
||||||
self.logger.info(f'Processed {to_capture["url"]}')
|
self.logger.info(f'Successfully captured {to_capture["url"]} - {uuid}')
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f'Unable to capture {to_capture["url"]}')
|
self.logger.warning(f'Unable to capture {to_capture["url"]} - {uuid}')
|
||||||
lazy_cleanup.srem('ongoing', uuid)
|
lazy_cleanup.srem('ongoing', uuid)
|
||||||
lazy_cleanup.delete(uuid)
|
lazy_cleanup.delete(uuid)
|
||||||
# make sure to expire the key if nothing was processed for a while (= queues empty)
|
# make sure to expire the key if nothing was processed for a while (= queues empty)
|
||||||
|
|
|
@ -16,7 +16,6 @@ from functools import lru_cache
|
||||||
from enum import IntEnum, unique
|
from enum import IntEnum, unique
|
||||||
|
|
||||||
from har2tree import CrawledTree, HostNode, URLNode
|
from har2tree import CrawledTree, HostNode, URLNode
|
||||||
from redis import Redis
|
|
||||||
import requests
|
import requests
|
||||||
from requests.exceptions import HTTPError
|
from requests.exceptions import HTTPError
|
||||||
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
|
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
|
||||||
|
@ -264,17 +263,6 @@ def get_useragent_for_requests():
|
||||||
return f'Lookyloo / {version}'
|
return f'Lookyloo / {version}'
|
||||||
|
|
||||||
|
|
||||||
def get_capture_status(capture_uuid: str, /) -> CaptureStatus:
|
|
||||||
r = Redis(unix_socket_path=get_socket_path('cache'))
|
|
||||||
if r.zrank('to_capture', capture_uuid) is not None:
|
|
||||||
return CaptureStatus.QUEUED
|
|
||||||
elif r.hexists('lookup_dirs', capture_uuid):
|
|
||||||
return CaptureStatus.DONE
|
|
||||||
elif r.sismember('ongoing', capture_uuid):
|
|
||||||
return CaptureStatus.ONGOING
|
|
||||||
return CaptureStatus.UNKNOWN
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(64)
|
@lru_cache(64)
|
||||||
def get_splash_url() -> str:
|
def get_splash_url() -> str:
|
||||||
if os.environ.get('SPLASH_URL_DOCKER'):
|
if os.environ.get('SPLASH_URL_DOCKER'):
|
||||||
|
|
|
@ -31,7 +31,7 @@ from werkzeug.useragents import UserAgent
|
||||||
from .exceptions import NoValidHarFile, MissingUUID, LookylooException, MissingCaptureDirectory
|
from .exceptions import NoValidHarFile, MissingUUID, LookylooException, MissingCaptureDirectory
|
||||||
from .helpers import (get_homedir, get_socket_path, get_config, get_email_template, load_pickle_tree,
|
from .helpers import (get_homedir, get_socket_path, get_config, get_email_template, load_pickle_tree,
|
||||||
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains,
|
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains,
|
||||||
try_make_file, get_captures_dir, get_splash_url)
|
try_make_file, get_captures_dir, get_splash_url, CaptureStatus)
|
||||||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP, UniversalWhois, UrlScan
|
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP, UniversalWhois, UrlScan
|
||||||
from .capturecache import CaptureCache
|
from .capturecache import CaptureCache
|
||||||
from .context import Context
|
from .context import Context
|
||||||
|
@ -563,6 +563,15 @@ class Lookyloo():
|
||||||
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
|
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
|
||||||
return all_cache
|
return all_cache
|
||||||
|
|
||||||
|
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
|
||||||
|
if self.redis.zrank('to_capture', capture_uuid) is not None:
|
||||||
|
return CaptureStatus.QUEUED
|
||||||
|
elif self.redis.hexists('lookup_dirs', capture_uuid):
|
||||||
|
return CaptureStatus.DONE
|
||||||
|
elif self.redis.sismember('ongoing', capture_uuid):
|
||||||
|
return CaptureStatus.ONGOING
|
||||||
|
return CaptureStatus.UNKNOWN
|
||||||
|
|
||||||
def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]:
|
def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]:
|
||||||
"""Get the cache from redis."""
|
"""Get the cache from redis."""
|
||||||
if capture_uuid in self._captures_index and not self._captures_index[capture_uuid].incomplete_redirects:
|
if capture_uuid in self._captures_index and not self._captures_index[capture_uuid].incomplete_redirects:
|
||||||
|
@ -570,6 +579,7 @@ class Lookyloo():
|
||||||
try:
|
try:
|
||||||
capture_dir = self._get_capture_dir(capture_uuid)
|
capture_dir = self._get_capture_dir(capture_uuid)
|
||||||
except LookylooException:
|
except LookylooException:
|
||||||
|
if self.get_capture_status(capture_uuid) not in [CaptureStatus.QUEUED, CaptureStatus.ONGOING]:
|
||||||
self.logger.warning(f'Unable to find {capture_uuid} (not in the cache and/or missing capture directory).')
|
self.logger.warning(f'Unable to find {capture_uuid} (not in the cache and/or missing capture directory).')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ from werkzeug.security import check_password_hash
|
||||||
from pymisp import MISPEvent, MISPServerError
|
from pymisp import MISPEvent, MISPServerError
|
||||||
|
|
||||||
from lookyloo.helpers import (get_user_agents, get_config, get_taxonomies, load_cookies,
|
from lookyloo.helpers import (get_user_agents, get_config, get_taxonomies, load_cookies,
|
||||||
CaptureStatus, splash_status, get_capture_status)
|
CaptureStatus, splash_status)
|
||||||
from lookyloo.lookyloo import Lookyloo, Indexing
|
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||||
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
||||||
|
|
||||||
|
@ -556,7 +556,7 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
cache = lookyloo.capture_cache(tree_uuid)
|
cache = lookyloo.capture_cache(tree_uuid)
|
||||||
if not cache:
|
if not cache:
|
||||||
status = get_capture_status(tree_uuid)
|
status = lookyloo.get_capture_status(tree_uuid)
|
||||||
splash_up, splash_message = splash_status()
|
splash_up, splash_message = splash_status()
|
||||||
if not splash_up:
|
if not splash_up:
|
||||||
flash(f'The capture module is not reachable ({splash_message}).', 'error')
|
flash(f'The capture module is not reachable ({splash_message}).', 'error')
|
||||||
|
|
|
@ -13,7 +13,7 @@ from werkzeug.security import check_password_hash
|
||||||
from lookyloo.lookyloo import Lookyloo
|
from lookyloo.lookyloo import Lookyloo
|
||||||
|
|
||||||
from .helpers import (src_request_ip, load_user_from_request, build_users_table)
|
from .helpers import (src_request_ip, load_user_from_request, build_users_table)
|
||||||
from lookyloo.helpers import splash_status, get_capture_status
|
from lookyloo.helpers import splash_status
|
||||||
|
|
||||||
api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/')
|
api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/')
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ class SplashStatus(Resource):
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureStatusQuery(Resource):
|
class CaptureStatusQuery(Resource):
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str):
|
||||||
return {'status_code': get_capture_status(capture_uuid)}
|
return {'status_code': lookyloo.get_capture_status(capture_uuid)}
|
||||||
|
|
||||||
|
|
||||||
@api.route('/json/<string:capture_uuid>/hostnames')
|
@api.route('/json/<string:capture_uuid>/hostnames')
|
||||||
|
|
Loading…
Reference in New Issue