new: Add url to do a full export, and get the page

pull/79/head
Raphaël Vinot 2020-05-12 16:53:10 +02:00
parent 6781d5f02e
commit cd972076b5
2 changed files with 45 additions and 4 deletions

View File

@ -15,6 +15,7 @@ import socket
from typing import Union, Dict, List, Tuple, Optional, Any from typing import Union, Dict, List, Tuple, Optional, Any
from urllib.parse import urlsplit from urllib.parse import urlsplit
from uuid import uuid4 from uuid import uuid4
from zipfile import ZipFile
from defang import refang # type: ignore from defang import refang # type: ignore
from har2tree import CrawledTree, Har2TreeError, HarFile from har2tree import CrawledTree, Har2TreeError, HarFile
@ -279,9 +280,29 @@ class Lookyloo():
except Har2TreeError as e: except Har2TreeError as e:
raise NoValidHarFile(e.message) raise NoValidHarFile(e.message)
def load_image(self, capture_dir: Path) -> BytesIO: def _get_raw(self, capture_dir: Path, extension: str='*', all_files: bool=True) -> BytesIO:
with open(list(capture_dir.glob('*.png'))[0], 'rb') as f: all_paths = sorted(list(capture_dir.glob(f'*.{extension}')))
if not all_files:
# Only get the first one in the list
with open(all_paths[0], 'rb') as f:
return BytesIO(f.read()) return BytesIO(f.read())
to_return = BytesIO()
with ZipFile(to_return, 'w') as myzip:
for path in all_paths:
if path.name.endswith('pickle'):
continue
myzip.write(path, arcname=f'{capture_dir.name}/{path.name}')
to_return.seek(0)
return to_return
def get_html(self, capture_dir: Path, all_html: bool=False) -> BytesIO:
return self._get_raw(capture_dir, 'html', all_html)
def get_screenshot(self, capture_dir: Path, all_images: bool=False) -> BytesIO:
return self._get_raw(capture_dir, 'png', all_images)
def get_capture(self, capture_dir: Path) -> BytesIO:
return self._get_raw(capture_dir)
def sane_js_query(self, sha512: str) -> Dict: def sane_js_query(self, sha512: str) -> Dict:
if self.use_sane_js: if self.use_sane_js:

View File

@ -209,11 +209,31 @@ def image(tree_uuid):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid) capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir: if not capture_dir:
return Response('Not available.', mimetype='text/text') return Response('Not available.', mimetype='text/text')
to_return = lookyloo.load_image(capture_dir) to_return = lookyloo.get_screenshot(capture_dir)
return send_file(to_return, mimetype='image/png', return send_file(to_return, mimetype='image/png',
as_attachment=True, attachment_filename='image.png') as_attachment=True, attachment_filename='image.png')
@app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
def html(tree_uuid):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
to_return = lookyloo.get_html(capture_dir)
return send_file(to_return, mimetype='text/html',
as_attachment=True, attachment_filename='page.html')
@app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
def export(tree_uuid):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
to_return = lookyloo.get_capture(capture_dir)
return send_file(to_return, mimetype='application/zip',
as_attachment=True, attachment_filename='capture.zip')
@app.route('/redirects/<string:tree_uuid>', methods=['GET']) @app.route('/redirects/<string:tree_uuid>', methods=['GET'])
def redirects(tree_uuid): def redirects(tree_uuid):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid) capture_dir = lookyloo.lookup_capture_dir(tree_uuid)