new: Many improvments when correlating ressources

pull/887/head
Raphaël Vinot 2024-02-21 14:36:19 +01:00
parent 104129bbe2
commit 391166aec7
8 changed files with 94 additions and 49 deletions

View File

@ -201,7 +201,7 @@ class Indexing():
def get_body_hash_captures(self, body_hash: str, filter_url: str | None=None, def get_body_hash_captures(self, body_hash: str, filter_url: str | None=None,
filter_capture_uuid: str | None=None, filter_capture_uuid: str | None=None,
limit: int=20, limit: int=20,
prefered_uuids: set[str]=set()) -> tuple[int, list[tuple[str, str, str, bool]]]: prefered_uuids: set[str]=set()) -> tuple[int, list[tuple[str, str, str, bool, str]]]:
'''Get the captures matching the hash. '''Get the captures matching the hash.
:param filter_url: URL of the hash we're searching for :param filter_url: URL of the hash we're searching for
@ -209,7 +209,7 @@ class Indexing():
:param limit: Max matching captures to return, -1 means unlimited. :param limit: Max matching captures to return, -1 means unlimited.
:param prefered_uuids: UUID cached right now, so we don't rebuild trees. :param prefered_uuids: UUID cached right now, so we don't rebuild trees.
''' '''
to_return: list[tuple[str, str, str, bool]] = [] to_return: list[tuple[str, str, str, bool, str]] = []
len_captures = self.redis.scard(f'bh|{body_hash}|captures') len_captures = self.redis.scard(f'bh|{body_hash}|captures')
unlimited = False unlimited = False
if limit == -1: if limit == -1:
@ -227,9 +227,9 @@ class Indexing():
url_uuid, hostnode_uuid, url = entry.split('|', 2) url_uuid, hostnode_uuid, url = entry.split('|', 2)
hostname: str = urlsplit(url).hostname hostname: str = urlsplit(url).hostname
if filter_url: if filter_url:
to_return.append((capture_uuid, hostnode_uuid, hostname, url == filter_url)) to_return.append((capture_uuid, hostnode_uuid, hostname, url == filter_url, url))
else: else:
to_return.append((capture_uuid, hostnode_uuid, hostname, False)) to_return.append((capture_uuid, hostnode_uuid, hostname, False, url))
if not unlimited and limit <= 0: if not unlimited and limit <= 0:
break break
return len_captures, to_return return len_captures, to_return

View File

@ -927,11 +927,15 @@ class Lookyloo():
return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page) return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
- set(ct.root_hartree.all_url_requests.keys())) - set(ct.root_hartree.all_url_requests.keys()))
def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float]]]: def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str, datetime, str, str]], list[tuple[str, float]]]:
'''Returns all the captures related to a hash (sha512), used in the web interface.''' '''Returns all the captures related to a hash (sha512), used in the web interface.'''
total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1) total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1)
cached_captures = self.sorted_capture_cache([d[0] for d in details]) captures = []
captures = [(cache.uuid, cache.title) for cache in cached_captures] for capture_uuid, hostnode_uuid, hostname, _, url in details:
cache = self.capture_cache(capture_uuid)
if not cache:
continue
captures.append((cache.uuid, cache.title, cache.timestamp, hostnode_uuid, url))
domains = self.indexing.get_body_hash_domains(body_hash) domains = self.indexing.get_body_hash_domains(body_hash)
return captures, domains return captures, domains
@ -1079,7 +1083,7 @@ class Lookyloo():
captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []} captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
prefered_uuids=set(self._captures_index.keys())) prefered_uuids=set(self._captures_index.keys()))
for h_capture_uuid, url_uuid, url_hostname, same_url in details: for h_capture_uuid, url_uuid, url_hostname, same_url, url in details:
cache = self.capture_cache(h_capture_uuid) cache = self.capture_cache(h_capture_uuid)
if cache and hasattr(cache, 'title'): if cache and hasattr(cache, 'title'):
if same_url: if same_url:

View File

@ -1052,6 +1052,13 @@ def recapture(tree_uuid: str) -> str | Response | WerkzeugResponse:
return _prepare_capture_template(user_ua=request.headers.get('User-Agent')) return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
@app.route('/ressource_by_hash/<string:sha512>', methods=['GET'])
@file_response # type: ignore[misc]
def ressource_by_hash(sha512: str) -> Response:
details, body = lookyloo.get_body_hash_full(sha512)
return send_file(body, as_attachment=True, download_name='ressource.bin')
# ################## Submit existing capture ################## # ################## Submit existing capture ##################
@app.route('/submit_capture', methods=['GET', 'POST']) @app.route('/submit_capture', methods=['GET', 'POST'])

View File

@ -1,3 +1,5 @@
{% from "macros.html" import shorten_string %}
{% if from_popup %} {% if from_popup %}
{% extends "main.html" %} {% extends "main.html" %}
@ -23,50 +25,70 @@
{% endblock %} {% endblock %}
{%endif%} {%endif%}
{% block content %} {% block content %}
{% if from_popup %} {% if from_popup %}
<button onclick="window.history.back();" class="btn btn-primary" type="button">Go Back</button> <button onclick="window.history.back();" class="btn btn-primary" type="button">Go Back</button>
{%endif%} {%endif%}
<center>
<h6>{{ body_hash }}</h6>
<a href="{{ url_for('ressource_by_hash', sha512=body_hash) }}">Download</a>
</center>
<script type="text/javascript"> <script type="text/javascript">
new DataTable('#bodyHashDetailsTable', { new DataTable('#freqHostTable', {
columnDefs: [{ width: '20%', targets: 0 }, columnDefs: [{ width: '20%', targets: 0 },
{ width: '80%', targets: 1 }], { width: '80%', targets: 1 }],
}); });
</script> </script>
<center> <script type="text/javascript">
<h4>{{ body_hash }}</h4> new DataTable('#bodyHashDetailsTable', {
</center> columnDefs: [{ width: '20%', targets: 0 },
<table id="bodyHashDetailsTable" class="table table-striped" style="width:100%"> { width: '40%', targets: 1 },
{ width: '40%', targets: 2 }],
});
</script>
<table id="freqHostTable" class="table table-striped" style="width:100%">
<thead> <thead>
<tr> <tr>
<th>Hostname</th>
<th>Frequency</th> <th>Frequency</th>
<th>Hostname</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for domain, freq in domains %} {% for domain, freq in domains %}
<tr> <tr>
<td>
{{ domain }}
</td>
<td>{{ freq }}</td> <td>{{ freq }}</td>
<td>{{ domain }}</td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
</div>
<p>The same file was seen in these captures:</p> <p>The same file was seen in these captures:</p>
<ul> <table id="bodyHashDetailsTable" class="table table-striped" style="width:100%">
{% for capture_uuid, title in captures %} <thead>
<li> <tr>
{% if from_popup %} <th>Timestamp</th>
<a href="#/" onclick="openTreeInNewTab('{{ capture_uuid }}')">{{ title }}</a> <th>Title</th>
{% else %} <th>URL</th>
<a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">{{ title }}</a> </tr>
{% endif %} </thead>
</li> <tbody>
{% endfor %} {% for capture_uuid, title, timestamp, hostnode_uuid, url in captures %}
</ul> <tr>
<td>{{ timestamp }}</td>
<td>
{% if from_popup %}
<a href="#/" onclick="openTreeInNewTab('{{ capture_uuid }}')">{{ title }}</a>
{% else %}
<a href="{{ url_for('tree', tree_uuid=capture_uuid, node_uuid=hostnode_uuid) }}">{{ title }}</a>
{% endif %}
</td>
<td>{{ url }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endblock %} {% endblock %}

View File

@ -1,8 +1,10 @@
{% from "macros.html" import shorten_string %}
<script type="text/javascript"> <script type="text/javascript">
new DataTable('#faviconDetailsTable', { new DataTable('#faviconDetailsTable', {
columnDefs: [{ width: '40%', targets: 0 }, columnDefs: [{ width: '30%', targets: 0 },
{ width: '40%', targets: 1 }, { width: '30%', targets: 1 },
{ width: '20%', targets: 2 }], { width: '50%', targets: 2 }],
}); });
</script> </script>
@ -12,24 +14,26 @@
<table id="faviconDetailsTable" class="table table-striped" style="width:100%"> <table id="faviconDetailsTable" class="table table-striped" style="width:100%">
<thead> <thead>
<tr> <tr>
<th>Capture Time</th>
<th>Capture Title</th> <th>Capture Title</th>
<th>Landing page</th> <th>Landing page</th>
<th>Capture Time</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for capture_uuid, title, landing_page, capture_time in captures %} {% for capture_uuid, title, landing_page, capture_time in captures %}
<tr> <tr>
<td>
{{capture_time}}
</td>
<td> <td>
<a href="{{ url_for('tree', tree_uuid=capture_uuid) }}"> <a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">
{{ title }} {{ title }}
</a> </a>
</td> </td>
<td> <td>
{{landing_page}} <span class="d-inline-block text-truncate" style="max-width: 400px;">
</td> {{ shorten_string(landing_page, 300) }}
<td> </span>
{{capture_time}}
</td> </td>
</tr> </tr>
{% endfor %} {% endfor %}

View File

@ -538,7 +538,7 @@
<div class="modal-dialog modal-xl" role="document"> <div class="modal-dialog modal-xl" role="document">
<div class="modal-content"> <div class="modal-content">
<div class="modal-header"> <div class="modal-header">
<h5 class="modal-title" id="faviconsModalLabel">Favicons in tree</h5> <h5 class="modal-title" id="faviconsModalLabel">Favicons found on the rendered page</h5>
<button type="button" class="btn btn-close" data-bs-dismiss="modal" aria-label="Close"></button> <button type="button" class="btn btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div> </div>
<div class="modal-body"> <div class="modal-body">

View File

@ -10,9 +10,9 @@
<table id="bodyHashesTable" class="table table-striped" style="width:100%"> <table id="bodyHashesTable" class="table table-striped" style="width:100%">
<thead> <thead>
<tr> <tr>
<th>File type</th>
<th>Captures total</th> <th>Captures total</th>
<th>Ressource</th> <th>File type</th>
<th>Ressource URL in capture</th>
<th>Hash (sha512)</th> <th>Hash (sha512)</th>
</tr> </tr>
</thead> </thead>
@ -20,6 +20,7 @@
{% for body_hash, info in body_hashes.items() %} {% for body_hash, info in body_hashes.items() %}
{% set icon_info = get_icon(info['node'].generic_type) %} {% set icon_info = get_icon(info['node'].generic_type) %}
<tr> <tr>
<td>{{ info['total_captures'] }}</td>
<td> <td>
<a href="{{ url_for('get_ressource', tree_uuid=tree_uuid, node_uuid=info['node'].uuid) }}"> <a href="{{ url_for('get_ressource', tree_uuid=tree_uuid, node_uuid=info['node'].uuid) }}">
<img src="{{ url_for('static', filename=icon_info['icon']) }}" alt="{{ icon_info['tooltip'] }}" <img src="{{ url_for('static', filename=icon_info['icon']) }}" alt="{{ icon_info['tooltip'] }}"
@ -34,7 +35,6 @@
/> />
</a> </a>
</td> </td>
<td>{{ info['total_captures'] }}</td>
<td><p class="text-break">{{ info['node'].name }}</p></td> <td><p class="text-break">{{ info['node'].name }}</p></td>
<td> <td>
<span class="d-inline-block text-truncate" style="max-width: 200px;"> <span class="d-inline-block text-truncate" style="max-width: 200px;">

View File

@ -1,31 +1,39 @@
<script type="text/javascript"> <script type="text/javascript">
new DataTable('#faviconsTable', { new DataTable('#faviconsTable', {
columnDefs: [{ width: '20%', targets: 0 }, columnDefs: [{ width: '10%', targets: 0 },
{ width: '40%', targets: 1 }, { width: '50%', targets: 1 },
{ width: '40%', targets: 2 }], { width: '40%', targets: 2 }],
}); });
</script> </script>
<h5 class="text-center">Click on the favicon to see the other captures it's been found in</h5>
<table id="faviconsTable" class="table table-striped" style="width:100%"> <table id="faviconsTable" class="table table-striped" style="width:100%">
<thead> <thead>
<tr> <tr>
<th>Favicon</th>
<th>Frequency</th>
<th>Number of captures</th> <th>Number of captures</th>
<th>Favicon</th>
<th>Download</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for favicon_sha512, freq, number_captures, mimetype, b64_favicon in favicons %} {% for favicon_sha512, freq, number_captures, mimetype, b64_favicon in favicons %}
<tr> <tr>
<td>{{ number_captures }}</td>
<td> <td>
<a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512) }}" <a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512) }}"
data-bs-toggle="modal" data-bs-target="#faviconDetailsModal" role="button"> data-bs-toggle="modal" data-bs-target="#faviconDetailsModal" role="button">
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"/> <img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
title="Click to see other captures with the same favicon"/>
</a> </a>
<button type="button" class="btn btn-light" onclick="downloadBase64File('{{mimetype}}', '{{b64_favicon}}', 'favicon.ico')">Download favicon</button> <br>
</td>
<td>
<button type="button" class="btn btn-light" onclick="downloadBase64File('{{mimetype}}', '{{b64_favicon}}', 'favicon.ico')">
<img src="{{ url_for('static', filename='download.svg') }}" style="width:16px;height:16px;"
title="Download the favicon"/>
</button>
</td> </td>
<td>{{ freq }}</td>
<td>{{ number_captures }}</td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>