diff --git a/bin/async_capture.py b/bin/async_capture.py index 2d5b6095..19456c4a 100755 --- a/bin/async_capture.py +++ b/bin/async_capture.py @@ -91,66 +91,19 @@ class AsyncCapture(AbstractManager): # By default, the captures are not on the index, unless the user mark them as listed listing = True if ('listing' in to_capture and to_capture['listing'].lower() in ['true', '1']) else False - now = datetime.now() - dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat() - safe_create_dir(dirpath) - - if 'os' in to_capture or 'browser' in to_capture: - meta: Dict[str, str] = {} - if 'os' in to_capture: - meta['os'] = to_capture['os'] - if 'browser' in to_capture: - meta['browser'] = to_capture['browser'] - with (dirpath / 'meta').open('w') as _meta: - json.dump(meta, _meta) - - # Write UUID - with (dirpath / 'uuid').open('w') as _uuid: - _uuid.write(uuid) - - # Write no_index marker (optional) - if not listing: - (dirpath / 'no_index').touch() - - # Write parent UUID (optional) - if 'parent' in to_capture: - with (dirpath / 'parent').open('w') as _parent: - _parent.write(to_capture['parent']) - - if 'downloaded_filename' in entries and entries['downloaded_filename']: - with (dirpath / '0.data.filename').open('w') as _downloaded_filename: - _downloaded_filename.write(entries['downloaded_filename']) - - if 'downloaded_file' in entries and entries['downloaded_file']: - with (dirpath / '0.data').open('wb') as _downloaded_file: - _downloaded_file.write(entries['downloaded_file']) - - if 'error' in entries: - with (dirpath / 'error.txt').open('w') as _error: - json.dump(entries['error'], _error) - - if 'har' in entries: - with (dirpath / '0.har').open('w') as _har: - json.dump(entries['har'], _har) - - if 'png' in entries and entries['png']: - with (dirpath / '0.png').open('wb') as _img: - _img.write(entries['png']) - - if 'html' in entries and entries['html']: - with (dirpath / '0.html').open('w') as _html: - _html.write(entries['html']) - - if 'last_redirected_url' in entries and entries['last_redirected_url']: - with (dirpath / '0.last_redirect.txt').open('w') as _redir: - _redir.write(entries['last_redirected_url']) - - if 'cookies' in entries and entries['cookies']: - with (dirpath / '0.cookies.json').open('w') as _cookies: - json.dump(entries['cookies'], _cookies) + self.lookyloo.store_capture( + uuid, listing, + os=to_capture.get('os'), browser=to_capture.get('os'), + parent=to_capture.get('parent'), + downloaded_filename=entries.get('downloaded_filename'), + downloaded_file=entries.get('downloaded_file'), + error=entries.get('error'), har=entries.get('har'), + png=entries.get('png'), html=entries.get('html'), + last_redirected_url=entries.get('last_redirected_url'), + cookies=entries.get('cookies') # type: ignore + ) lazy_cleanup = self.lookyloo.redis.pipeline() - lazy_cleanup.hset('lookup_dirs', uuid, str(dirpath)) if queue and self.lookyloo.redis.zscore('queues', queue): lazy_cleanup.zincrby('queues', -1, queue) lazy_cleanup.zrem('to_capture', uuid) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 1a3545ad..0098bb18 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -37,7 +37,7 @@ from redis.connection import UnixDomainSocketConnection from .capturecache import CaptureCache, CapturesIndex from .context import Context -from .default import LookylooException, get_homedir, get_config, get_socket_path +from .default import LookylooException, get_homedir, get_config, get_socket_path, safe_create_dir from .exceptions import (MissingCaptureDirectory, MissingUUID, TreeNeedsRebuild, NoValidHarFile) from .helpers import (get_captures_dir, get_email_template, @@ -1182,3 +1182,72 @@ class Lookyloo(): year_stats['yearly_redirects'] += month_stats['redirects'] statistics['years'].append(year_stats) return statistics + + def store_capture(self, uuid: str, is_public: bool, + os: Optional[str]=None, browser: Optional[str]=None, + parent: Optional[str]=None, + downloaded_filename: Optional[str]=None, downloaded_file: Optional[bytes]=None, + error: Optional[str]=None, har: Optional[Dict[str, Any]]=None, + png: Optional[bytes]=None, html: Optional[str]=None, + last_redirected_url: Optional[str]=None, + cookies: Optional[List[Dict[str, str]]]=None + ) -> None: + + now = datetime.now() + dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat() + safe_create_dir(dirpath) + + if os or browser: + meta: Dict[str, str] = {} + if os: + meta['os'] = os + if browser: + meta['browser'] = browser + with (dirpath / 'meta').open('w') as _meta: + json.dump(meta, _meta) + + # Write UUID + with (dirpath / 'uuid').open('w') as _uuid: + _uuid.write(uuid) + + # Write no_index marker (optional) + if not is_public: + (dirpath / 'no_index').touch() + + # Write parent UUID (optional) + if parent: + with (dirpath / 'parent').open('w') as _parent: + _parent.write(parent) + + if downloaded_filename: + with (dirpath / '0.data.filename').open('w') as _downloaded_filename: + _downloaded_filename.write(downloaded_filename) + + if downloaded_file: + with (dirpath / '0.data').open('wb') as _downloaded_file: + _downloaded_file.write(downloaded_file) + + if error: + with (dirpath / 'error.txt').open('w') as _error: + json.dump(error, _error) + + if har: + with (dirpath / '0.har').open('w') as _har: + json.dump(har, _har) + + if png: + with (dirpath / '0.png').open('wb') as _img: + _img.write(png) + + if html: + with (dirpath / '0.html').open('w') as _html: + _html.write(html) + + if last_redirected_url: + with (dirpath / '0.last_redirect.txt').open('w') as _redir: + _redir.write(last_redirected_url) + + if cookies: + with (dirpath / '0.cookies.json').open('w') as _cookies: + json.dump(cookies, _cookies) + self.redis.hset('lookup_dirs', uuid, str(dirpath)) diff --git a/poetry.lock b/poetry.lock index 4d4a3af3..bc523ed0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -409,7 +409,7 @@ tornado = ["tornado (>=0.2)"] [[package]] name = "har2tree" -version = "1.16.0" +version = "1.16.1" description = "HTTP Archive (HAR) to ETE Toolkit generator" category = "main" optional = false @@ -419,7 +419,7 @@ python-versions = ">=3.8,<3.12" beautifulsoup4 = ">=4.11.1,<5.0.0" cchardet = ">=2.1.7,<3.0.0" ete3 = ">=3.1.2,<4.0.0" -filetype = ">=1.1.0,<2.0.0" +filetype = ">=1.2.0,<2.0.0" lxml = ">=4.9.1,<5.0.0" numpy = [ {version = "1.23.3", markers = "python_version < \"3.10\""}, @@ -1341,7 +1341,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "vt-py" -version = "0.17.2" +version = "0.17.3" description = "The official Python client library for VirusTotal" category = "main" optional = false @@ -1435,7 +1435,7 @@ misp = ["python-magic", "pydeep2"] [metadata] lock-version = "1.1" python-versions = ">=3.8,<3.12" -content-hash = "74152635ad2079a51fbc1bad596897c61ca2a05b199059cb96e0d5a3dd8d0928" +content-hash = "6faef544e363edbfb356b99c5ec94eb36b17c7dbe09cd818528221b2cec4097d" [metadata.files] aiohttp = [ @@ -1832,8 +1832,8 @@ gunicorn = [ {file = "gunicorn-20.1.0.tar.gz", hash = "sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8"}, ] har2tree = [ - {file = "har2tree-1.16.0-py3-none-any.whl", hash = "sha256:f006eb79c4200671193573af22bfded7e3a37c9ffd410b7c1290d6003b0494cb"}, - {file = "har2tree-1.16.0.tar.gz", hash = "sha256:6e52299b20b94ea9afe3d687524551a416c60cb9cf90ddfed31b2ad4f13ec6b0"}, + {file = "har2tree-1.16.1-py3-none-any.whl", hash = "sha256:b970fa0de4f6cc9fe4235c433563727a0c9d692cfd441b77fffdb8f28eb69481"}, + {file = "har2tree-1.16.1.tar.gz", hash = "sha256:17543d06e90020e96c6bd1ce3bcc37870413ba3d8ee9a6bbad44bec7b8556959"}, ] hiredis = [ {file = "hiredis-2.0.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b4c8b0bc5841e578d5fb32a16e0c305359b987b850a06964bd5a62739d688048"}, @@ -2592,8 +2592,8 @@ urllib3 = [ {file = "urllib3-1.26.12.tar.gz", hash = "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e"}, ] vt-py = [ - {file = "vt-py-0.17.2.tar.gz", hash = "sha256:ef8b7da02771111bdbb32d7f02db6e14ed81a17625cdfc0337847ed6842396f1"}, - {file = "vt_py-0.17.2-py3-none-any.whl", hash = "sha256:b154fb2130f88fb2fd46a1f1472739b3eb028886fc3254bacbe65c9225e66ab5"}, + {file = "vt-py-0.17.3.tar.gz", hash = "sha256:2f96fe86c7213dda9e45ab06bf18f7843f9513c1a073b1606fe238ea624a5b32"}, + {file = "vt_py-0.17.3-py3-none-any.whl", hash = "sha256:c6cb4e134dcf12683de97993ef7e1daedd7e548acfdc5dc6b730db92c3207610"}, ] w3lib = [ {file = "w3lib-2.0.1-py3-none-any.whl", hash = "sha256:c5d966f86ae3fb546854478c769250c3ccb7581515b3221bcd2f864440000188"}, diff --git a/pyproject.toml b/pyproject.toml index 971f51c9..1da3382f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ redis = {version = "^4.3.4", extras = ["hiredis"]} beautifulsoup4 = "^4.11.1" bootstrap-flask = "^2.1.0" defang = "^0.5.3" -vt-py = "^0.17.2" +vt-py = "^0.17.3" pyeupi = "^1.1" pysanejs = "^2.0.1" pylookyloo = "^1.16.0" @@ -62,7 +62,7 @@ pyhashlookup = "^1.2.1" lief = "^0.12.3" ua-parser = "^0.16.1" Flask-Login = "^0.6.2" -har2tree = "^1.16.0" +har2tree = "^1.16.1" passivetotal = "^2.5.9" werkzeug = "^2.2.2" filetype = "^1.2.0" diff --git a/website/web/__init__.py b/website/web/__init__.py index 659f489c..3a46b6b7 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -14,6 +14,7 @@ from importlib.metadata import version from io import BytesIO, StringIO from typing import Any, Dict, List, Optional, Union, TypedDict from urllib.parse import quote_plus, unquote_plus, urlparse +from uuid import uuid4 import flask_login # type: ignore from flask import (Flask, Response, flash, jsonify, redirect, render_template, @@ -864,6 +865,28 @@ def recapture(tree_uuid: str): return _prepare_capture_template(user_ua=request.headers.get('User-Agent')) +# ################## Submit existing capture ################## + +@app.route('/submit_capture', methods=['GET', 'POST']) +def submit_capture(): + + if request.method == 'POST': + if 'har_file' not in request.files: + flash('Invalid submission: please submit at least an HAR file.', 'error') + else: + uuid = str(uuid4()) + har = json.loads(request.files['har_file'].stream.read()) + listing = True if request.form.get('listing') else False + lookyloo.store_capture(uuid, is_public=listing, har=har) + return redirect(url_for('tree', tree_uuid=uuid)) + + return render_template('submit_capture.html', + default_public=get_config('generic', 'default_public'), + public_domain=lookyloo.public_domain) + + +# ############################################################# + @app.route('/capture', methods=['GET', 'POST']) def capture_web(): if flask_login.current_user.is_authenticated: diff --git a/website/web/templates/submit_capture.html b/website/web/templates/submit_capture.html new file mode 100644 index 00000000..c2c152da --- /dev/null +++ b/website/web/templates/submit_capture.html @@ -0,0 +1,73 @@ +{% extends "main.html" %} +{% from 'bootstrap5/utils.html' import render_messages %} +{% block title %}Submit an existing capture{% endblock %} + +{% block card %} + + + + + + +{% endblock %} + +{% block content %} +
+
+ + Lookyloo + +
+ {{ render_messages(container=True, dismissible=True) }} +
+
+
+
+ + +
+
+
+
+ +
+ +
[Experimental] It can be any file in HTTP Archive format, from any source (browser or any other tool)
+
+ This feature is experimantal and it may not work for some reason. If it is the case, please + open an issue on github and attach the HAR file so we can investigate. +
+
+
+ + + +
+ + {% if default_public %} + By default, the capture is public. If you do not want that, untick the box at the top of the form. + {% else %} + By default, the capture is private (not visible on the index page). If you want it to be public tick the box at the top of the form. + {% endif %} + +
+
+ +
+
+
+{% endblock %} + +{% block scripts %} + {{ super() }} +{% endblock %}