mirror of https://github.com/CIRCL/lookyloo
chg: Better handling of insanely long webpages.
parent
6d2cbc6c2e
commit
4686b560dc
|
@ -113,7 +113,7 @@ class Lookyloo():
|
||||||
with self_generated_ua_file.open('w') as f:
|
with self_generated_ua_file.open('w') as f:
|
||||||
json.dump(to_store, f, indent=2)
|
json.dump(to_store, f, indent=2)
|
||||||
|
|
||||||
def _cache_capture(self, capture_uuid: str) -> None:
|
def _cache_capture(self, capture_uuid: str) -> CrawledTree:
|
||||||
'''Generate the pickle, add capture in the indexes'''
|
'''Generate the pickle, add capture in the indexes'''
|
||||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||||
|
|
||||||
|
@ -142,6 +142,7 @@ class Lookyloo():
|
||||||
|
|
||||||
with (capture_dir / 'tree.pickle').open('wb') as _p:
|
with (capture_dir / 'tree.pickle').open('wb') as _p:
|
||||||
pickle.dump(ct, _p)
|
pickle.dump(ct, _p)
|
||||||
|
return ct
|
||||||
|
|
||||||
def _build_cname_chain(self, known_cnames: Dict[str, Optional[str]], hostname) -> List[str]:
|
def _build_cname_chain(self, known_cnames: Dict[str, Optional[str]], hostname) -> List[str]:
|
||||||
'''Returns a list of CNAMEs starting from one hostname.
|
'''Returns a list of CNAMEs starting from one hostname.
|
||||||
|
@ -211,8 +212,7 @@ class Lookyloo():
|
||||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||||
ct = load_pickle_tree(capture_dir)
|
ct = load_pickle_tree(capture_dir)
|
||||||
if not ct:
|
if not ct:
|
||||||
self._cache_capture(capture_uuid)
|
ct = self._cache_capture(capture_uuid)
|
||||||
ct = load_pickle_tree(capture_dir)
|
|
||||||
if not ct:
|
if not ct:
|
||||||
raise NoValidHarFile(f'Unable to get tree from {capture_dir}')
|
raise NoValidHarFile(f'Unable to get tree from {capture_dir}')
|
||||||
return ct
|
return ct
|
||||||
|
@ -650,18 +650,25 @@ class Lookyloo():
|
||||||
'''Get the cookie(s)'''
|
'''Get the cookie(s)'''
|
||||||
return self._get_raw(capture_uuid, 'cookies.json', all_cookies)
|
return self._get_raw(capture_uuid, 'cookies.json', all_cookies)
|
||||||
|
|
||||||
def get_screenshot(self, capture_uuid: str, all_images: bool=False) -> BytesIO:
|
def get_screenshot(self, capture_uuid: str) -> BytesIO:
|
||||||
'''Get the screenshot(s) of the rendered page'''
|
'''Get the screenshot(s) of the rendered page'''
|
||||||
return self._get_raw(capture_uuid, 'png', all_images)
|
return self._get_raw(capture_uuid, 'png', all_files=False)
|
||||||
|
|
||||||
def get_screenshot_thumbnail(self, capture_uuid: str, all_images: bool=False, for_datauri=False) -> Union[str, BytesIO]:
|
def get_screenshot_thumbnail(self, capture_uuid: str, for_datauri=False) -> Union[str, BytesIO]:
|
||||||
'''Get the thumbnail of the rendered page'''
|
'''Get the thumbnail of the rendered page'''
|
||||||
|
to_return = BytesIO()
|
||||||
size = 64, 64
|
size = 64, 64
|
||||||
screenshot = Image.open(self._get_raw(capture_uuid, 'png', all_images))
|
try:
|
||||||
|
screenshot = self.get_screenshot(capture_uuid)
|
||||||
|
with Image.open(screenshot) as screenshot:
|
||||||
c_screenshot = screenshot.crop((0, 0, screenshot.width, screenshot.width))
|
c_screenshot = screenshot.crop((0, 0, screenshot.width, screenshot.width))
|
||||||
c_screenshot.thumbnail(size)
|
c_screenshot.thumbnail(size)
|
||||||
to_return = BytesIO()
|
|
||||||
c_screenshot.save(to_return, 'png')
|
c_screenshot.save(to_return, 'png')
|
||||||
|
except Image.DecompressionBombError as e:
|
||||||
|
# The image is most probably too big: https://pillow.readthedocs.io/en/stable/reference/Image.html
|
||||||
|
self.logger.warning(f'Unable to generate the screenshot thumbnail of {capture_uuid}: image too big ({e}).')
|
||||||
|
# TODO: Default image
|
||||||
|
|
||||||
if for_datauri:
|
if for_datauri:
|
||||||
return base64.b64encode(to_return.getvalue()).decode()
|
return base64.b64encode(to_return.getvalue()).decode()
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -492,7 +492,7 @@ function update(root, computed_node_width=0) {
|
||||||
.attr('id', 'screenshot_thumbnail')
|
.attr('id', 'screenshot_thumbnail')
|
||||||
.attr("width", thumbnail_size)
|
.attr("width", thumbnail_size)
|
||||||
.attr("height", thumbnail_size)
|
.attr("height", thumbnail_size)
|
||||||
.attr("xlink:href", `data:image/png;base64,${screenshot_thumbnail}`)
|
.attr("xlink:href", screenshot_thumbnail ? `data:image/png;base64,${screenshot_thumbnail}` : '/static/error_screenshot.png')
|
||||||
.attr('cursor', 'pointer')
|
.attr('cursor', 'pointer')
|
||||||
.on('mouseover', (event, d) => {
|
.on('mouseover', (event, d) => {
|
||||||
d3.select('#tooltip')
|
d3.select('#tooltip')
|
||||||
|
|
|
@ -301,6 +301,9 @@
|
||||||
</div>
|
</div>
|
||||||
<div class="modal-body">
|
<div class="modal-body">
|
||||||
<center>
|
<center>
|
||||||
|
{% if not b64_thumbnail %}
|
||||||
|
Image too big to display in the browser, <a href="{{ url_for('image', tree_uuid=tree_uuid) }}" role="button">click here to download it</a>.
|
||||||
|
{% else %}
|
||||||
{% if blur_screenshot %}
|
{% if blur_screenshot %}
|
||||||
<button type="button" class="btn btn-info" onclick="$('#screenshot').removeClass('blur')"> Unblur screenshot</button>
|
<button type="button" class="btn btn-info" onclick="$('#screenshot').removeClass('blur')"> Unblur screenshot</button>
|
||||||
</br>
|
</br>
|
||||||
|
@ -308,6 +311,7 @@
|
||||||
</br>
|
</br>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<img src="{{ url_for('image', tree_uuid=tree_uuid) }}" class="img-fluid {{ 'blur' if blur_screenshot else '' }}" id="screenshot"/>
|
<img src="{{ url_for('image', tree_uuid=tree_uuid) }}" class="img-fluid {{ 'blur' if blur_screenshot else '' }}" id="screenshot"/>
|
||||||
|
{% endif %}
|
||||||
</center>
|
</center>
|
||||||
</div>
|
</div>
|
||||||
<div class="modal-footer">
|
<div class="modal-footer">
|
||||||
|
|
Loading…
Reference in New Issue