From b6ea1c24db079b7cf31d35f7f6644e1e936e84fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 31 Mar 2021 19:25:57 +0200 Subject: [PATCH] new: Query capture status --- lookyloo/helpers.py | 9 +++++++++ lookyloo/lookyloo.py | 22 +++++++++++++++++++--- website/web/__init__.py | 5 +++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index 37729174..93130573 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -13,6 +13,7 @@ from datetime import datetime, timedelta from glob import glob from urllib.parse import urlparse from functools import lru_cache +from enum import IntEnum, unique from har2tree import CrawledTree, HostNode, URLNode from redis import Redis @@ -33,6 +34,14 @@ configs: Dict[str, Dict[str, Any]] = {} logger = logging.getLogger('Lookyloo - Helpers') +@unique +class CaptureStatus(IntEnum): + UNKNOWN = -1 + QUEUED = 0 + DONE = 1 + ONGOING = 2 + + # This method is used in json.dump or json.dumps calls as the default parameter: # json.dumps(..., default=dump_to_json) def serialize_to_json(obj: Union[Set]) -> Union[List]: diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 1d840cf5..5c5ffa2f 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -35,7 +35,8 @@ from werkzeug.useragents import UserAgent from .exceptions import NoValidHarFile, MissingUUID, LookylooException from .helpers import (get_homedir, get_socket_path, load_cookies, get_config, safe_create_dir, get_email_template, load_pickle_tree, - remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains) + remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains, + CaptureStatus) from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP from .capturecache import CaptureCache from .context import Context @@ -546,6 +547,15 @@ class Lookyloo(): raise NoValidHarFile(f'UUID ({capture_uuid}) linked to a missing directory ({capture_dir}). Removed now.') return to_return + def get_capture_status(self, capture_uuid: str) -> CaptureStatus: + if self.redis.sismember('to_capture', capture_uuid): + return CaptureStatus.QUEUED + elif self.redis.hexists('lookup_dirs', capture_uuid): + return CaptureStatus.DONE + elif self.redis.sismember('ongoing', capture_uuid): + return CaptureStatus.ONGOING + return CaptureStatus.UNKNOWN + def enqueue_capture(self, query: MutableMapping[str, Any]) -> str: '''Enqueue a query in the capture queue (used by the API for asynchronous processing)''' perma_uuid = str(uuid4()) @@ -566,14 +576,20 @@ class Lookyloo(): uuid = self.redis.spop('to_capture') if not uuid: return None + self.redis.sadd('ongoing', uuid) + to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid) - self.redis.delete(uuid) to_capture['perma_uuid'] = uuid if 'cookies' in to_capture: to_capture['cookies_pseudofile'] = to_capture.pop('cookies') - if self.capture(**to_capture): # type: ignore + + status = self.capture(**to_capture) # type: ignore + self.redis.srem('ongoing', uuid) + self.redis.delete(uuid) + if status: self.logger.info(f'Processed {to_capture["url"]}') return True + self.logger.warning(f'Unable to capture {to_capture["url"]}') return False def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None: diff --git a/website/web/__init__.py b/website/web/__init__.py index e2169429..6c674150 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -925,6 +925,11 @@ def json_get_token(): return jsonify({'error': 'User/Password invalid.'}) +@app.route('/json//status', methods=['GET']) +def get_capture_status(tree_uuid: str): + return jsonify({'status_code': lookyloo.get_capture_status(tree_uuid)}) + + @app.route('/json//redirects', methods=['GET']) def json_redirects(tree_uuid: str): cache = lookyloo.capture_cache(tree_uuid)