From 391166aec7656929ba728e34f464850ed3fa56ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 21 Feb 2024 14:36:19 +0100 Subject: [PATCH] new: Many improvments when correlating ressources --- lookyloo/indexing.py | 8 +-- lookyloo/lookyloo.py | 12 ++-- website/web/__init__.py | 7 +++ website/web/templates/body_hash.html | 64 ++++++++++++++------- website/web/templates/favicon_details.html | 20 ++++--- website/web/templates/tree.html | 2 +- website/web/templates/tree_body_hashes.html | 6 +- website/web/templates/tree_favicons.html | 24 +++++--- 8 files changed, 94 insertions(+), 49 deletions(-) diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index 252915a..8f7d83e 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -201,7 +201,7 @@ class Indexing(): def get_body_hash_captures(self, body_hash: str, filter_url: str | None=None, filter_capture_uuid: str | None=None, limit: int=20, - prefered_uuids: set[str]=set()) -> tuple[int, list[tuple[str, str, str, bool]]]: + prefered_uuids: set[str]=set()) -> tuple[int, list[tuple[str, str, str, bool, str]]]: '''Get the captures matching the hash. :param filter_url: URL of the hash we're searching for @@ -209,7 +209,7 @@ class Indexing(): :param limit: Max matching captures to return, -1 means unlimited. :param prefered_uuids: UUID cached right now, so we don't rebuild trees. ''' - to_return: list[tuple[str, str, str, bool]] = [] + to_return: list[tuple[str, str, str, bool, str]] = [] len_captures = self.redis.scard(f'bh|{body_hash}|captures') unlimited = False if limit == -1: @@ -227,9 +227,9 @@ class Indexing(): url_uuid, hostnode_uuid, url = entry.split('|', 2) hostname: str = urlsplit(url).hostname if filter_url: - to_return.append((capture_uuid, hostnode_uuid, hostname, url == filter_url)) + to_return.append((capture_uuid, hostnode_uuid, hostname, url == filter_url, url)) else: - to_return.append((capture_uuid, hostnode_uuid, hostname, False)) + to_return.append((capture_uuid, hostnode_uuid, hostname, False, url)) if not unlimited and limit <= 0: break return len_captures, to_return diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 0946a32..da6581b 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -927,11 +927,15 @@ class Lookyloo(): return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page) - set(ct.root_hartree.all_url_requests.keys())) - def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float]]]: + def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str, datetime, str, str]], list[tuple[str, float]]]: '''Returns all the captures related to a hash (sha512), used in the web interface.''' total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1) - cached_captures = self.sorted_capture_cache([d[0] for d in details]) - captures = [(cache.uuid, cache.title) for cache in cached_captures] + captures = [] + for capture_uuid, hostnode_uuid, hostname, _, url in details: + cache = self.capture_cache(capture_uuid) + if not cache: + continue + captures.append((cache.uuid, cache.title, cache.timestamp, hostnode_uuid, url)) domains = self.indexing.get_body_hash_domains(body_hash) return captures, domains @@ -1079,7 +1083,7 @@ class Lookyloo(): captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []} total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, prefered_uuids=set(self._captures_index.keys())) - for h_capture_uuid, url_uuid, url_hostname, same_url in details: + for h_capture_uuid, url_uuid, url_hostname, same_url, url in details: cache = self.capture_cache(h_capture_uuid) if cache and hasattr(cache, 'title'): if same_url: diff --git a/website/web/__init__.py b/website/web/__init__.py index 7df0401..aa0c485 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -1052,6 +1052,13 @@ def recapture(tree_uuid: str) -> str | Response | WerkzeugResponse: return _prepare_capture_template(user_ua=request.headers.get('User-Agent')) +@app.route('/ressource_by_hash/', methods=['GET']) +@file_response # type: ignore[misc] +def ressource_by_hash(sha512: str) -> Response: + details, body = lookyloo.get_body_hash_full(sha512) + return send_file(body, as_attachment=True, download_name='ressource.bin') + + # ################## Submit existing capture ################## @app.route('/submit_capture', methods=['GET', 'POST']) diff --git a/website/web/templates/body_hash.html b/website/web/templates/body_hash.html index 94a8550..f5b82d0 100644 --- a/website/web/templates/body_hash.html +++ b/website/web/templates/body_hash.html @@ -1,3 +1,5 @@ +{% from "macros.html" import shorten_string %} + {% if from_popup %} {% extends "main.html" %} @@ -23,50 +25,70 @@ {% endblock %} {%endif%} + {% block content %} {% if from_popup %} {%endif%} +
+
{{ body_hash }}
+ Download +
+ -
-

{{ body_hash }}

-
- + +
- + {% for domain, freq in domains %} - + {% endfor %}
Hostname FrequencyHostname
- {{ domain }} - {{ freq }}{{ domain }}
-

The same file was seen in these captures:

- + + + + + + + + + + {% for capture_uuid, title, timestamp, hostnode_uuid, url in captures %} + + + + + + {% endfor %} + +
TimestampTitleURL
{{ timestamp }} + {% if from_popup %} + {{ title }} + {% else %} + {{ title }} + {% endif %} + {{ url }}
{% endblock %} diff --git a/website/web/templates/favicon_details.html b/website/web/templates/favicon_details.html index 4622198..382a89e 100644 --- a/website/web/templates/favicon_details.html +++ b/website/web/templates/favicon_details.html @@ -1,8 +1,10 @@ +{% from "macros.html" import shorten_string %} + @@ -12,24 +14,26 @@ + - {% for capture_uuid, title, landing_page, capture_time in captures %} + - {% endfor %} diff --git a/website/web/templates/tree.html b/website/web/templates/tree.html index 955ee82..cb37d93 100644 --- a/website/web/templates/tree.html +++ b/website/web/templates/tree.html @@ -538,7 +538,7 @@
Capture Time Capture Title Landing pageCapture Time
+ {{capture_time}} + {{ title }} - {{landing_page}} - - {{capture_time}} + + {{ shorten_string(landing_page, 300) }} +
- - + + @@ -20,6 +20,7 @@ {% for body_hash, info in body_hashes.items() %} {% set icon_info = get_icon(info['node'].generic_type) %} + -
File type Captures totalRessourceFile typeRessource URL in capture Hash (sha512)
{{ info['total_captures'] }} {{ icon_info['tooltip'] }} {{ info['total_captures'] }}

{{ info['node'].name }}

diff --git a/website/web/templates/tree_favicons.html b/website/web/templates/tree_favicons.html index 8482aca..c16e361 100644 --- a/website/web/templates/tree_favicons.html +++ b/website/web/templates/tree_favicons.html @@ -1,31 +1,39 @@ + +
Click on the favicon to see the other captures it's been found in
- - + + {% for favicon_sha512, freq, number_captures, mimetype, b64_favicon in favicons %} + + - - {% endfor %}
FaviconFrequency Number of capturesFaviconDownload
{{ number_captures }} - + - +
+
+ {{ freq }}{{ number_captures }}