mirror of https://github.com/CIRCL/lookyloo
Merge branch 'AntoniaBK-upload_capture'
commit
2a6ed52fef
|
@ -15,7 +15,7 @@ from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse a
|
||||||
|
|
||||||
from lookyloo import Lookyloo, CaptureSettings
|
from lookyloo import Lookyloo, CaptureSettings
|
||||||
from lookyloo.exceptions import LacusUnreachable
|
from lookyloo.exceptions import LacusUnreachable
|
||||||
from lookyloo.default import AbstractManager, get_config
|
from lookyloo.default import AbstractManager, get_config, LookylooException
|
||||||
from lookyloo.helpers import get_captures_dir
|
from lookyloo.helpers import get_captures_dir
|
||||||
|
|
||||||
from lookyloo.modules import FOX
|
from lookyloo.modules import FOX
|
||||||
|
@ -69,7 +69,7 @@ class AsyncCapture(AbstractManager):
|
||||||
elif isinstance(self.lookyloo.lacus, PyLacus):
|
elif isinstance(self.lookyloo.lacus, PyLacus):
|
||||||
entries = self.lookyloo.lacus.get_capture(uuid)
|
entries = self.lookyloo.lacus.get_capture(uuid)
|
||||||
else:
|
else:
|
||||||
raise Exception('Something is broken.')
|
raise LookylooException(f'lacus must be LacusCore or PyLacus, not {type(self.lookyloo.lacus)}.')
|
||||||
log = f'Got the capture for {uuid} from Lacus'
|
log = f'Got the capture for {uuid} from Lacus'
|
||||||
if runtime := entries.get('runtime'):
|
if runtime := entries.get('runtime'):
|
||||||
log = f'{log} - Runtime: {runtime}'
|
log = f'{log} - Runtime: {runtime}'
|
||||||
|
|
|
@ -254,6 +254,15 @@ class CapturesIndex(Mapping): # type: ignore[type-arg]
|
||||||
def lru_cache_clear(self) -> None:
|
def lru_cache_clear(self) -> None:
|
||||||
load_pickle_tree.cache_clear()
|
load_pickle_tree.cache_clear()
|
||||||
|
|
||||||
|
def uuid_exists(self, uuid: str) -> bool:
|
||||||
|
if uuid in self.__cache:
|
||||||
|
return True
|
||||||
|
if self.redis.hexists('lookup_dirs', uuid):
|
||||||
|
return True
|
||||||
|
if self.redis.hexists('lookup_dirs_archived', uuid):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def _quick_init(self) -> None:
|
def _quick_init(self) -> None:
|
||||||
'''Initialize the cache with a list of UUIDs, with less back and forth with redis.
|
'''Initialize the cache with a list of UUIDs, with less back and forth with redis.
|
||||||
Only get recent captures.'''
|
Only get recent captures.'''
|
||||||
|
|
|
@ -322,7 +322,7 @@ def is_locked(locked_dir_path: Path, /) -> bool:
|
||||||
max_wait_content = 5
|
max_wait_content = 5
|
||||||
while max_wait_content > 0:
|
while max_wait_content > 0:
|
||||||
with lock_file.open('r') as f:
|
with lock_file.open('r') as f:
|
||||||
if content := f.read():
|
if content := f.read().strip():
|
||||||
break
|
break
|
||||||
# The file is empty, we're between the creation and setting the content
|
# The file is empty, we're between the creation and setting the content
|
||||||
logger.info(f'Lock file empty ({lock_file}), waiting...')
|
logger.info(f'Lock file empty ({lock_file}), waiting...')
|
||||||
|
|
|
@ -1416,6 +1416,98 @@ class Lookyloo():
|
||||||
|
|
||||||
return statistics
|
return statistics
|
||||||
|
|
||||||
|
def unpack_full_capture_archive(self, archive: BytesIO, listing: bool) -> tuple[str, dict[str, list[str]]]:
|
||||||
|
unrecoverable_error = False
|
||||||
|
messages: dict[str, list[str]] = {'errors': [], 'warnings': []}
|
||||||
|
os: str | None = None
|
||||||
|
browser: str | None = None
|
||||||
|
parent: str | None = None
|
||||||
|
downloaded_filename: str | None = None
|
||||||
|
downloaded_file: bytes | None = None
|
||||||
|
error: str | None = None
|
||||||
|
har: dict[str, Any] | None = None
|
||||||
|
screenshot: bytes | None = None
|
||||||
|
html: str | None = None
|
||||||
|
last_redirected_url: str | None = None
|
||||||
|
cookies: list[Cookie] | list[dict[str, str]] | None = None
|
||||||
|
capture_settings: CaptureSettings | None = None
|
||||||
|
potential_favicons: set[bytes] | None = None
|
||||||
|
|
||||||
|
files_to_skip = ['cnames.json', 'ipasn.json', 'ips.json']
|
||||||
|
|
||||||
|
with ZipFile(archive, 'r') as lookyloo_capture:
|
||||||
|
potential_favicons = set()
|
||||||
|
for filename in lookyloo_capture.namelist():
|
||||||
|
if filename.endswith('0.har.gz'):
|
||||||
|
# new formal
|
||||||
|
har = json.loads(gzip.decompress(lookyloo_capture.read(filename)))
|
||||||
|
elif filename.endswith('0.har'):
|
||||||
|
# old format
|
||||||
|
har = json.loads(lookyloo_capture.read(filename))
|
||||||
|
elif filename.endswith('0.html'):
|
||||||
|
html = lookyloo_capture.read(filename).decode()
|
||||||
|
elif filename.endswith('0.last_redirect.txt'):
|
||||||
|
last_redirected_url = lookyloo_capture.read(filename).decode()
|
||||||
|
elif filename.endswith('0.png'):
|
||||||
|
screenshot = lookyloo_capture.read(filename)
|
||||||
|
elif filename.endswith('0.cookies.json'):
|
||||||
|
# Not required
|
||||||
|
cookies = json.loads(lookyloo_capture.read(filename))
|
||||||
|
elif filename.endswith('potential_favicons.ico'):
|
||||||
|
# We may have more than one favicon
|
||||||
|
potential_favicons.add(lookyloo_capture.read(filename))
|
||||||
|
elif filename.endswith('uuid'):
|
||||||
|
uuid = lookyloo_capture.read(filename).decode()
|
||||||
|
if self._captures_index.uuid_exists(uuid):
|
||||||
|
messages['warnings'].append(f'UUID {uuid} already exists, set a new one.')
|
||||||
|
uuid = str(uuid4())
|
||||||
|
elif filename.endswith('meta'):
|
||||||
|
meta = json.loads(lookyloo_capture.read(filename))
|
||||||
|
if 'os' in meta:
|
||||||
|
os = meta['os']
|
||||||
|
if 'browser' in meta:
|
||||||
|
browser = meta['browser']
|
||||||
|
elif filename.endswith('no_index'):
|
||||||
|
# Force it to false regardless the form
|
||||||
|
listing = False
|
||||||
|
elif filename.endswith('parent'):
|
||||||
|
parent = lookyloo_capture.read(filename).decode()
|
||||||
|
elif filename.endswith('0.data.filename'):
|
||||||
|
downloaded_filename = lookyloo_capture.read(filename).decode()
|
||||||
|
elif filename.endswith('0.data'):
|
||||||
|
downloaded_file = lookyloo_capture.read(filename)
|
||||||
|
elif filename.endswith('error.txt'):
|
||||||
|
error = lookyloo_capture.read(filename).decode()
|
||||||
|
elif filename.endswith('capture_settings.json'):
|
||||||
|
capture_settings = json.loads(lookyloo_capture.read(filename))
|
||||||
|
else:
|
||||||
|
for to_skip in files_to_skip:
|
||||||
|
if filename.endswith(to_skip):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
messages['warnings'].append(f'Unexpected file in the capture archive: {filename}')
|
||||||
|
if not har or not html or not last_redirected_url or not screenshot:
|
||||||
|
# If we don't have these 4 files, the archive is incomplete and we should not store it.
|
||||||
|
unrecoverable_error = True
|
||||||
|
if not har:
|
||||||
|
messages['errors'].append('Invalid submission: missing HAR file')
|
||||||
|
if not html:
|
||||||
|
messages['errors'].append('Invalid submission: missing HTML file')
|
||||||
|
if not last_redirected_url:
|
||||||
|
messages['errors'].append('Invalid submission: missing landing page')
|
||||||
|
if not screenshot:
|
||||||
|
messages['errors'].append('Invalid submission: missing screenshot')
|
||||||
|
if not unrecoverable_error:
|
||||||
|
self.store_capture(uuid, is_public=listing,
|
||||||
|
os=os, browser=browser, parent=parent,
|
||||||
|
downloaded_filename=downloaded_filename, downloaded_file=downloaded_file,
|
||||||
|
error=error, har=har, png=screenshot, html=html,
|
||||||
|
last_redirected_url=last_redirected_url,
|
||||||
|
cookies=cookies,
|
||||||
|
capture_settings=capture_settings,
|
||||||
|
potential_favicons=potential_favicons)
|
||||||
|
return uuid, messages
|
||||||
|
|
||||||
def store_capture(self, uuid: str, is_public: bool,
|
def store_capture(self, uuid: str, is_public: bool,
|
||||||
os: str | None=None, browser: str | None=None,
|
os: str | None=None, browser: str | None=None,
|
||||||
parent: str | None=None,
|
parent: str | None=None,
|
||||||
|
|
|
@ -5,7 +5,6 @@ from __future__ import annotations
|
||||||
import base64
|
import base64
|
||||||
import calendar
|
import calendar
|
||||||
import functools
|
import functools
|
||||||
import gzip
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import http
|
import http
|
||||||
import json
|
import json
|
||||||
|
@ -1457,12 +1456,12 @@ def submit_capture() -> str | Response | WerkzeugResponse:
|
||||||
|
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
listing = True if request.form.get('listing') else False
|
listing = True if request.form.get('listing') else False
|
||||||
uuid = str(uuid4()) # NOTE: new UUID, because we do not want duplicates
|
|
||||||
har: dict[str, Any] | None = None
|
har: dict[str, Any] | None = None
|
||||||
html: str | None = None
|
html: str | None = None
|
||||||
last_redirected_url: str | None = None
|
last_redirected_url: str | None = None
|
||||||
screenshot: bytes | None = None
|
screenshot: bytes | None = None
|
||||||
if 'har_file' in request.files and request.files['har_file']:
|
if 'har_file' in request.files and request.files['har_file']:
|
||||||
|
uuid = str(uuid4())
|
||||||
har = json.loads(request.files['har_file'].stream.read())
|
har = json.loads(request.files['har_file'].stream.read())
|
||||||
last_redirected_url = request.form.get('landing_page')
|
last_redirected_url = request.form.get('landing_page')
|
||||||
if 'screenshot_file' in request.files:
|
if 'screenshot_file' in request.files:
|
||||||
|
@ -1475,44 +1474,15 @@ def submit_capture() -> str | Response | WerkzeugResponse:
|
||||||
return redirect(url_for('tree', tree_uuid=uuid))
|
return redirect(url_for('tree', tree_uuid=uuid))
|
||||||
elif 'full_capture' in request.files and request.files['full_capture']:
|
elif 'full_capture' in request.files and request.files['full_capture']:
|
||||||
# it *only* accepts a lookyloo export.
|
# it *only* accepts a lookyloo export.
|
||||||
cookies: list[dict[str, str]] | None = None
|
full_capture_file = BytesIO(request.files['full_capture'].stream.read())
|
||||||
has_error = False
|
uuid, messages = lookyloo.unpack_full_capture_archive(full_capture_file, listing)
|
||||||
with ZipFile(BytesIO(request.files['full_capture'].stream.read()), 'r') as lookyloo_capture:
|
if 'errors' in messages and messages['errors']:
|
||||||
potential_favicons = set()
|
for error in messages['errors']:
|
||||||
for filename in lookyloo_capture.namelist():
|
flash(error, 'error')
|
||||||
if filename.endswith('0.har.gz'):
|
else:
|
||||||
# new formal
|
if 'warnings' in messages:
|
||||||
har = json.loads(gzip.decompress(lookyloo_capture.read(filename)))
|
for warning in messages['warnings']:
|
||||||
elif filename.endswith('0.har'):
|
flash(warning, 'warning')
|
||||||
# old format
|
|
||||||
har = json.loads(lookyloo_capture.read(filename))
|
|
||||||
elif filename.endswith('0.html'):
|
|
||||||
html = lookyloo_capture.read(filename).decode()
|
|
||||||
elif filename.endswith('0.last_redirect.txt'):
|
|
||||||
last_redirected_url = lookyloo_capture.read(filename).decode()
|
|
||||||
elif filename.endswith('0.png'):
|
|
||||||
screenshot = lookyloo_capture.read(filename)
|
|
||||||
elif filename.endswith('0.cookies.json'):
|
|
||||||
# Not required
|
|
||||||
cookies = json.loads(lookyloo_capture.read(filename))
|
|
||||||
elif filename.endswith('potential_favicons.ico'):
|
|
||||||
# We may have more than one favicon
|
|
||||||
potential_favicons.add(lookyloo_capture.read(filename))
|
|
||||||
if not har or not html or not last_redirected_url or not screenshot:
|
|
||||||
has_error = True
|
|
||||||
if not har:
|
|
||||||
flash('Invalid submission: missing HAR file', 'error')
|
|
||||||
if not html:
|
|
||||||
flash('Invalid submission: missing HTML file', 'error')
|
|
||||||
if not last_redirected_url:
|
|
||||||
flash('Invalid submission: missing landing page', 'error')
|
|
||||||
if not screenshot:
|
|
||||||
flash('Invalid submission: missing screenshot', 'error')
|
|
||||||
if not has_error:
|
|
||||||
lookyloo.store_capture(uuid, is_public=listing, har=har,
|
|
||||||
last_redirected_url=last_redirected_url,
|
|
||||||
png=screenshot, html=html, cookies=cookies,
|
|
||||||
potential_favicons=potential_favicons)
|
|
||||||
return redirect(url_for('tree', tree_uuid=uuid))
|
return redirect(url_for('tree', tree_uuid=uuid))
|
||||||
else:
|
else:
|
||||||
flash('Invalid submission: please submit at least an HAR file.', 'error')
|
flash('Invalid submission: please submit at least an HAR file.', 'error')
|
||||||
|
|
|
@ -3,16 +3,18 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import gzip
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
from uuid import uuid4
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
import flask_login # type: ignore[import-untyped]
|
import flask_login # type: ignore[import-untyped]
|
||||||
from flask import request, send_file, Response
|
from flask import request, send_file, Response
|
||||||
from flask_restx import Namespace, Resource, abort, fields # type: ignore[import-untyped]
|
from flask_restx import Namespace, Resource, fields # type: ignore[import-untyped]
|
||||||
from werkzeug.security import check_password_hash
|
from werkzeug.security import check_password_hash
|
||||||
|
|
||||||
from lacuscore import CaptureStatus as CaptureStatusCore
|
from lacuscore import CaptureStatus as CaptureStatusCore
|
||||||
|
@ -32,7 +34,7 @@ comparator: Comparator = Comparator()
|
||||||
def api_auth_check(method): # type: ignore[no-untyped-def]
|
def api_auth_check(method): # type: ignore[no-untyped-def]
|
||||||
if flask_login.current_user.is_authenticated or load_user_from_request(request):
|
if flask_login.current_user.is_authenticated or load_user_from_request(request):
|
||||||
return method
|
return method
|
||||||
abort(403, 'Authentication required.')
|
return 'Authentication required.', 403
|
||||||
|
|
||||||
|
|
||||||
token_request_fields = api.model('AuthTokenFields', {
|
token_request_fields = api.model('AuthTokenFields', {
|
||||||
|
@ -441,6 +443,55 @@ class CaptureReport(Resource): # type: ignore[misc]
|
||||||
return lookyloo.send_mail(capture_uuid, parameters.get('email', ''), parameters.get('comment'))
|
return lookyloo.send_mail(capture_uuid, parameters.get('email', ''), parameters.get('comment'))
|
||||||
|
|
||||||
|
|
||||||
|
@api.route('/json/upload')
|
||||||
|
@api.doc(description='Submits a capture from another instance')
|
||||||
|
class UploadCapture(Resource): # type: ignore[misc]
|
||||||
|
def post(self) -> dict[str, str | dict[str, list[str]]] | tuple[dict[str, str], int]:
|
||||||
|
parameters: dict[str, Any] = request.get_json(force=True)
|
||||||
|
listing = True if parameters['listing'] else False
|
||||||
|
har: dict[str, Any] | None = None
|
||||||
|
html: str | None = None
|
||||||
|
last_redirected_url: str | None = None
|
||||||
|
screenshot: bytes | None = None
|
||||||
|
|
||||||
|
if 'har_file' in parameters and parameters.get('har_file'):
|
||||||
|
uuid = str(uuid4())
|
||||||
|
try:
|
||||||
|
har_decoded = base64.b64decode(parameters['har_file'])
|
||||||
|
try:
|
||||||
|
# new format
|
||||||
|
har_uncompressed = gzip.decompress(har_decoded)
|
||||||
|
except gzip.BadGzipFile:
|
||||||
|
# old format
|
||||||
|
har_uncompressed = har_decoded
|
||||||
|
|
||||||
|
har = json.loads(har_uncompressed)
|
||||||
|
last_redirected_url = parameters.get('landing_page')
|
||||||
|
if 'screenshot_file' in parameters:
|
||||||
|
screenshot = base64.b64decode(parameters['screenshot_file'])
|
||||||
|
if 'html_file' in parameters:
|
||||||
|
html = base64.b64decode(parameters['html_file']).decode()
|
||||||
|
lookyloo.store_capture(uuid, is_public=listing, har=har,
|
||||||
|
last_redirected_url=last_redirected_url,
|
||||||
|
png=screenshot, html=html)
|
||||||
|
except Exception as e:
|
||||||
|
return {'error': f'Unable to process the upload: {e}'}, 400
|
||||||
|
return {'uuid': uuid}
|
||||||
|
|
||||||
|
elif 'full_capture' in parameters and parameters.get('full_capture'):
|
||||||
|
try:
|
||||||
|
zipped_capture = base64.b64decode(parameters['full_capture'].encode())
|
||||||
|
except Exception:
|
||||||
|
return {'error': 'Invalid base64-encoding'}, 400
|
||||||
|
full_capture_file = BytesIO(zipped_capture)
|
||||||
|
uuid, messages = lookyloo.unpack_full_capture_archive(full_capture_file, listing=listing)
|
||||||
|
if 'errors' in messages and messages['errors']:
|
||||||
|
return {'error': ', '.join(messages['errors'])}, 400
|
||||||
|
return {'uuid': uuid, 'messages': messages}
|
||||||
|
else:
|
||||||
|
return {'error': 'Full capture or at least har-file is required'}, 400
|
||||||
|
|
||||||
|
|
||||||
auto_report_model = api.model('AutoReportModel', {
|
auto_report_model = api.model('AutoReportModel', {
|
||||||
'email': fields.String(description="Email of the reporter, used by the analyst to get in touch.", example=''),
|
'email': fields.String(description="Email of the reporter, used by the analyst to get in touch.", example=''),
|
||||||
'comment': fields.String(description="Description of the URL, will be given to the analyst.", example='')
|
'comment': fields.String(description="Description of the URL, will be given to the analyst.", example='')
|
||||||
|
@ -475,14 +526,14 @@ class SubmitCapture(Resource): # type: ignore[misc]
|
||||||
@api.param('referer', 'Referer to pass to the capture') # type: ignore[misc]
|
@api.param('referer', 'Referer to pass to the capture') # type: ignore[misc]
|
||||||
@api.param('proxy', 'Proxy to use for the the capture') # type: ignore[misc]
|
@api.param('proxy', 'Proxy to use for the the capture') # type: ignore[misc]
|
||||||
@api.produces(['text/text']) # type: ignore[misc]
|
@api.produces(['text/text']) # type: ignore[misc]
|
||||||
def get(self) -> str | tuple[str, int]:
|
def get(self) -> str | tuple[dict[str, str], int]:
|
||||||
if flask_login.current_user.is_authenticated:
|
if flask_login.current_user.is_authenticated:
|
||||||
user = flask_login.current_user.get_id()
|
user = flask_login.current_user.get_id()
|
||||||
else:
|
else:
|
||||||
user = src_request_ip(request)
|
user = src_request_ip(request)
|
||||||
|
|
||||||
if 'url' not in request.args or not request.args.get('url'):
|
if 'url' not in request.args or not request.args.get('url'):
|
||||||
return 'No "url" in the URL params, nothting to capture.', 400
|
return {'error': 'No "url" in the URL params, nothting to capture.'}, 400
|
||||||
|
|
||||||
to_query: CaptureSettings = {
|
to_query: CaptureSettings = {
|
||||||
'url': request.args['url'],
|
'url': request.args['url'],
|
||||||
|
@ -664,8 +715,7 @@ class RebuildAll(Resource): # type: ignore[misc]
|
||||||
try:
|
try:
|
||||||
lookyloo.rebuild_all()
|
lookyloo.rebuild_all()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {'error': f'Unable to rebuild all captures: {e}.'}, 400
|
return {'error': f'Unable to rebuild all captures: {e}'}, 400
|
||||||
else:
|
|
||||||
return {'info': 'Captures successfully rebuilt.'}
|
return {'info': 'Captures successfully rebuilt.'}
|
||||||
|
|
||||||
|
|
||||||
|
@ -679,8 +729,7 @@ class RebuildAllCache(Resource): # type: ignore[misc]
|
||||||
try:
|
try:
|
||||||
lookyloo.rebuild_cache()
|
lookyloo.rebuild_cache()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {'error': f'Unable to rebuild all the caches: {e}.'}, 400
|
return {'error': f'Unable to rebuild all the caches: {e}'}, 400
|
||||||
else:
|
|
||||||
return {'info': 'All caches successfully rebuilt.'}
|
return {'info': 'All caches successfully rebuilt.'}
|
||||||
|
|
||||||
|
|
||||||
|
@ -696,8 +745,7 @@ class CaptureRebuildTree(Resource): # type: ignore[misc]
|
||||||
lookyloo.remove_pickle(capture_uuid)
|
lookyloo.remove_pickle(capture_uuid)
|
||||||
lookyloo.get_crawled_tree(capture_uuid)
|
lookyloo.get_crawled_tree(capture_uuid)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {'error': f'Unable to rebuild tree: {e}.'}, 400
|
return {'error': f'Unable to rebuild tree: {e}'}, 400
|
||||||
else:
|
|
||||||
return {'info': f'Tree {capture_uuid} successfully rebuilt.'}
|
return {'info': f'Tree {capture_uuid} successfully rebuilt.'}
|
||||||
|
|
||||||
|
|
||||||
|
@ -712,6 +760,5 @@ class CaptureHide(Resource): # type: ignore[misc]
|
||||||
try:
|
try:
|
||||||
lookyloo.hide_capture(capture_uuid)
|
lookyloo.hide_capture(capture_uuid)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {'error': f'Unable to hide the tree: {e}.'}, 400
|
return {'error': f'Unable to hide the tree: {e}'}, 400
|
||||||
else:
|
|
||||||
return {'info': f'Capture {capture_uuid} successfully hidden.'}
|
return {'info': f'Capture {capture_uuid} successfully hidden.'}
|
||||||
|
|
Loading…
Reference in New Issue