new: Add url to do a full export, and get the page

pull/79/head
Raphaël Vinot 2020-05-12 16:53:10 +02:00
parent 6781d5f02e
commit cd972076b5
2 changed files with 45 additions and 4 deletions

View File

@ -15,6 +15,7 @@ import socket
from typing import Union, Dict, List, Tuple, Optional, Any
from urllib.parse import urlsplit
from uuid import uuid4
from zipfile import ZipFile
from defang import refang # type: ignore
from har2tree import CrawledTree, Har2TreeError, HarFile
@ -279,9 +280,29 @@ class Lookyloo():
except Har2TreeError as e:
raise NoValidHarFile(e.message)
def load_image(self, capture_dir: Path) -> BytesIO:
with open(list(capture_dir.glob('*.png'))[0], 'rb') as f:
return BytesIO(f.read())
def _get_raw(self, capture_dir: Path, extension: str='*', all_files: bool=True) -> BytesIO:
all_paths = sorted(list(capture_dir.glob(f'*.{extension}')))
if not all_files:
# Only get the first one in the list
with open(all_paths[0], 'rb') as f:
return BytesIO(f.read())
to_return = BytesIO()
with ZipFile(to_return, 'w') as myzip:
for path in all_paths:
if path.name.endswith('pickle'):
continue
myzip.write(path, arcname=f'{capture_dir.name}/{path.name}')
to_return.seek(0)
return to_return
def get_html(self, capture_dir: Path, all_html: bool=False) -> BytesIO:
return self._get_raw(capture_dir, 'html', all_html)
def get_screenshot(self, capture_dir: Path, all_images: bool=False) -> BytesIO:
return self._get_raw(capture_dir, 'png', all_images)
def get_capture(self, capture_dir: Path) -> BytesIO:
return self._get_raw(capture_dir)
def sane_js_query(self, sha512: str) -> Dict:
if self.use_sane_js:

View File

@ -209,11 +209,31 @@ def image(tree_uuid):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
to_return = lookyloo.load_image(capture_dir)
to_return = lookyloo.get_screenshot(capture_dir)
return send_file(to_return, mimetype='image/png',
as_attachment=True, attachment_filename='image.png')
@app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
def html(tree_uuid):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
to_return = lookyloo.get_html(capture_dir)
return send_file(to_return, mimetype='text/html',
as_attachment=True, attachment_filename='page.html')
@app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
def export(tree_uuid):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
to_return = lookyloo.get_capture(capture_dir)
return send_file(to_return, mimetype='application/zip',
as_attachment=True, attachment_filename='capture.zip')
@app.route('/redirects/<string:tree_uuid>', methods=['GET'])
def redirects(tree_uuid):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)