new: Keep capture even if we have a network error

pull/406/head
Raphaël Vinot 2022-05-03 12:23:16 +02:00
parent bd62d62392
commit d222ae04aa
4 changed files with 29 additions and 20 deletions

View File

@ -186,24 +186,26 @@ class AsyncCapture(AbstractManager):
if 'error' in entries:
with (dirpath / 'error.txt').open('w') as _error:
json.dump(entries['error'], _error)
if 'har' not in entries:
return False, entries['error'] if entries['error'] else "Unknown error"
# The capture went fine
harfile = entries['har']
png = entries['png']
html = entries['html']
last_redirect = entries['last_redirected_url']
with (dirpath / '0.har').open('w') as _har:
json.dump(harfile, _har)
with (dirpath / '0.png').open('wb') as _img:
_img.write(png)
with (dirpath / '0.html').open('w') as _html:
_html.write(html)
with (dirpath / '0.last_redirect.txt').open('w') as _redir:
_redir.write(last_redirect)
json.dump(entries['har'], _har)
if 'cookies' in entries:
if 'png' in entries and entries['png']:
with (dirpath / '0.png').open('wb') as _img:
_img.write(entries['png'])
if 'html' in entries and entries['html']:
with (dirpath / '0.html').open('w') as _html:
_html.write(entries['html'])
if 'last_redirected_url' in entries and entries['last_redirected_url']:
with (dirpath / '0.last_redirect.txt').open('w') as _redir:
_redir.write(entries['last_redirected_url'])
if 'cookies' in entries and entries['cookies']:
with (dirpath / '0.cookies.json').open('w') as _cookies:
json.dump(entries['cookies'], _cookies)
self.redis.hset('lookup_dirs', perma_uuid, str(dirpath))

View File

@ -19,7 +19,7 @@ from zipfile import ZipFile
from defang import defang # type: ignore
from har2tree import CrawledTree, HostNode, URLNode
from PIL import Image # type: ignore
from PIL import Image, UnidentifiedImageError # type: ignore
from pymisp import MISPAttribute, MISPEvent, MISPObject
from redis import ConnectionPool, Redis
from redis.connection import UnixDomainSocketConnection
@ -447,6 +447,8 @@ class Lookyloo():
all_paths = sorted(list(capture_dir.glob(f'*.{extension}')))
if not all_files:
# Only get the first one in the list
if not all_paths:
return BytesIO()
with open(all_paths[0], 'rb') as f:
return BytesIO(f.read())
to_return = BytesIO()
@ -485,6 +487,11 @@ class Lookyloo():
self.logger.warning(f'Unable to generate the screenshot thumbnail of {capture_uuid}: image too big ({e}).')
error_img: Path = get_homedir() / 'website' / 'web' / 'static' / 'error_screenshot.png'
to_thumbnail = Image.open(error_img)
except UnidentifiedImageError as e:
# The image is most probably too big: https://pillow.readthedocs.io/en/stable/reference/Image.html
self.logger.warning(f'Unable to generate the screenshot thumbnail of {capture_uuid}: image too big ({e}).')
error_img = get_homedir() / 'website' / 'web' / 'static' / 'error_screenshot.png'
to_thumbnail = Image.open(error_img)
to_thumbnail.thumbnail(size)
to_thumbnail.save(to_return, 'png')

8
poetry.lock generated
View File

@ -692,7 +692,7 @@ websockets = "10.1"
[[package]]
name = "playwrightcapture"
version = "0.1.10"
version = "0.1.11"
description = "A simple library to capture websites using playwright"
category = "main"
optional = false
@ -1317,7 +1317,7 @@ misp = ["python-magic", "pydeep"]
[metadata]
lock-version = "1.1"
python-versions = ">=3.8,<3.11"
content-hash = "35289419d0dcc9a3f24c31684c49ebbf55062b255c2438266084dacd0bd2c9d9"
content-hash = "ce6a1d1db17e7d4b3812db4e53892cea0e73def216fa14bbfa61bc2abd8556ee"
[metadata.files]
aiohttp = [
@ -2063,8 +2063,8 @@ playwright = [
{file = "playwright-1.21.0-py3-none-win_amd64.whl", hash = "sha256:0cdd82d4d2ce176b596e960825a4be7b03b7637e9cb243e634e896d787160535"},
]
playwrightcapture = [
{file = "PlaywrightCapture-0.1.10-py3-none-any.whl", hash = "sha256:31b9c6a530f60e6d7fcc08645af6779685ed12b65b38bd033a7d3a20a13e441e"},
{file = "PlaywrightCapture-0.1.10.tar.gz", hash = "sha256:5b5b2ad906cca85573c760cfb6c931b1aed3d4cc2248edb5c8d380d1349f23ae"},
{file = "PlaywrightCapture-0.1.11-py3-none-any.whl", hash = "sha256:31c688a3e7cf8999fb1c0394b1fda41f09d428a5dc246f68e342b63c64bf0317"},
{file = "PlaywrightCapture-0.1.11.tar.gz", hash = "sha256:a6bc1c78daeca33817df4be6232a8a0e58be7225e2c286120776f0572023e4d4"},
]
prompt-toolkit = [
{file = "prompt_toolkit-3.0.29-py3-none-any.whl", hash = "sha256:62291dad495e665fca0bda814e342c69952086afb0f4094d0893d357e5c78752"},

View File

@ -65,7 +65,7 @@ lief = "^0.12.1"
ua-parser = "^0.10.0"
Flask-Login = "^0.6.1"
har2tree = "^1.11.4"
playwrightcapture = "^0.1.10"
playwrightcapture = "^0.1.11"
[tool.poetry.extras]
misp = ['python-magic', 'pydeep']