mirror of https://github.com/CIRCL/lookyloo
new: Query capture status
parent
3035bec310
commit
b6ea1c24db
|
@ -13,6 +13,7 @@ from datetime import datetime, timedelta
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
from enum import IntEnum, unique
|
||||||
|
|
||||||
from har2tree import CrawledTree, HostNode, URLNode
|
from har2tree import CrawledTree, HostNode, URLNode
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
|
@ -33,6 +34,14 @@ configs: Dict[str, Dict[str, Any]] = {}
|
||||||
logger = logging.getLogger('Lookyloo - Helpers')
|
logger = logging.getLogger('Lookyloo - Helpers')
|
||||||
|
|
||||||
|
|
||||||
|
@unique
|
||||||
|
class CaptureStatus(IntEnum):
|
||||||
|
UNKNOWN = -1
|
||||||
|
QUEUED = 0
|
||||||
|
DONE = 1
|
||||||
|
ONGOING = 2
|
||||||
|
|
||||||
|
|
||||||
# This method is used in json.dump or json.dumps calls as the default parameter:
|
# This method is used in json.dump or json.dumps calls as the default parameter:
|
||||||
# json.dumps(..., default=dump_to_json)
|
# json.dumps(..., default=dump_to_json)
|
||||||
def serialize_to_json(obj: Union[Set]) -> Union[List]:
|
def serialize_to_json(obj: Union[Set]) -> Union[List]:
|
||||||
|
|
|
@ -35,7 +35,8 @@ from werkzeug.useragents import UserAgent
|
||||||
from .exceptions import NoValidHarFile, MissingUUID, LookylooException
|
from .exceptions import NoValidHarFile, MissingUUID, LookylooException
|
||||||
from .helpers import (get_homedir, get_socket_path, load_cookies, get_config,
|
from .helpers import (get_homedir, get_socket_path, load_cookies, get_config,
|
||||||
safe_create_dir, get_email_template, load_pickle_tree,
|
safe_create_dir, get_email_template, load_pickle_tree,
|
||||||
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains)
|
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains,
|
||||||
|
CaptureStatus)
|
||||||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP
|
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP
|
||||||
from .capturecache import CaptureCache
|
from .capturecache import CaptureCache
|
||||||
from .context import Context
|
from .context import Context
|
||||||
|
@ -546,6 +547,15 @@ class Lookyloo():
|
||||||
raise NoValidHarFile(f'UUID ({capture_uuid}) linked to a missing directory ({capture_dir}). Removed now.')
|
raise NoValidHarFile(f'UUID ({capture_uuid}) linked to a missing directory ({capture_dir}). Removed now.')
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
|
def get_capture_status(self, capture_uuid: str) -> CaptureStatus:
|
||||||
|
if self.redis.sismember('to_capture', capture_uuid):
|
||||||
|
return CaptureStatus.QUEUED
|
||||||
|
elif self.redis.hexists('lookup_dirs', capture_uuid):
|
||||||
|
return CaptureStatus.DONE
|
||||||
|
elif self.redis.sismember('ongoing', capture_uuid):
|
||||||
|
return CaptureStatus.ONGOING
|
||||||
|
return CaptureStatus.UNKNOWN
|
||||||
|
|
||||||
def enqueue_capture(self, query: MutableMapping[str, Any]) -> str:
|
def enqueue_capture(self, query: MutableMapping[str, Any]) -> str:
|
||||||
'''Enqueue a query in the capture queue (used by the API for asynchronous processing)'''
|
'''Enqueue a query in the capture queue (used by the API for asynchronous processing)'''
|
||||||
perma_uuid = str(uuid4())
|
perma_uuid = str(uuid4())
|
||||||
|
@ -566,14 +576,20 @@ class Lookyloo():
|
||||||
uuid = self.redis.spop('to_capture')
|
uuid = self.redis.spop('to_capture')
|
||||||
if not uuid:
|
if not uuid:
|
||||||
return None
|
return None
|
||||||
|
self.redis.sadd('ongoing', uuid)
|
||||||
|
|
||||||
to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid)
|
to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid)
|
||||||
self.redis.delete(uuid)
|
|
||||||
to_capture['perma_uuid'] = uuid
|
to_capture['perma_uuid'] = uuid
|
||||||
if 'cookies' in to_capture:
|
if 'cookies' in to_capture:
|
||||||
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
|
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
|
||||||
if self.capture(**to_capture): # type: ignore
|
|
||||||
|
status = self.capture(**to_capture) # type: ignore
|
||||||
|
self.redis.srem('ongoing', uuid)
|
||||||
|
self.redis.delete(uuid)
|
||||||
|
if status:
|
||||||
self.logger.info(f'Processed {to_capture["url"]}')
|
self.logger.info(f'Processed {to_capture["url"]}')
|
||||||
return True
|
return True
|
||||||
|
self.logger.warning(f'Unable to capture {to_capture["url"]}')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None:
|
def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None:
|
||||||
|
|
|
@ -925,6 +925,11 @@ def json_get_token():
|
||||||
return jsonify({'error': 'User/Password invalid.'})
|
return jsonify({'error': 'User/Password invalid.'})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/json/<string:tree_uuid>/status', methods=['GET'])
|
||||||
|
def get_capture_status(tree_uuid: str):
|
||||||
|
return jsonify({'status_code': lookyloo.get_capture_status(tree_uuid)})
|
||||||
|
|
||||||
|
|
||||||
@app.route('/json/<string:tree_uuid>/redirects', methods=['GET'])
|
@app.route('/json/<string:tree_uuid>/redirects', methods=['GET'])
|
||||||
def json_redirects(tree_uuid: str):
|
def json_redirects(tree_uuid: str):
|
||||||
cache = lookyloo.capture_cache(tree_uuid)
|
cache = lookyloo.capture_cache(tree_uuid)
|
||||||
|
|
Loading…
Reference in New Issue