new: Many improvments when correlating ressources

pull/887/head
Raphaël Vinot 2024-02-21 14:36:19 +01:00
parent 104129bbe2
commit 391166aec7
8 changed files with 94 additions and 49 deletions

View File

@ -201,7 +201,7 @@ class Indexing():
def get_body_hash_captures(self, body_hash: str, filter_url: str | None=None,
filter_capture_uuid: str | None=None,
limit: int=20,
prefered_uuids: set[str]=set()) -> tuple[int, list[tuple[str, str, str, bool]]]:
prefered_uuids: set[str]=set()) -> tuple[int, list[tuple[str, str, str, bool, str]]]:
'''Get the captures matching the hash.
:param filter_url: URL of the hash we're searching for
@ -209,7 +209,7 @@ class Indexing():
:param limit: Max matching captures to return, -1 means unlimited.
:param prefered_uuids: UUID cached right now, so we don't rebuild trees.
'''
to_return: list[tuple[str, str, str, bool]] = []
to_return: list[tuple[str, str, str, bool, str]] = []
len_captures = self.redis.scard(f'bh|{body_hash}|captures')
unlimited = False
if limit == -1:
@ -227,9 +227,9 @@ class Indexing():
url_uuid, hostnode_uuid, url = entry.split('|', 2)
hostname: str = urlsplit(url).hostname
if filter_url:
to_return.append((capture_uuid, hostnode_uuid, hostname, url == filter_url))
to_return.append((capture_uuid, hostnode_uuid, hostname, url == filter_url, url))
else:
to_return.append((capture_uuid, hostnode_uuid, hostname, False))
to_return.append((capture_uuid, hostnode_uuid, hostname, False, url))
if not unlimited and limit <= 0:
break
return len_captures, to_return

View File

@ -927,11 +927,15 @@ class Lookyloo():
return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
- set(ct.root_hartree.all_url_requests.keys()))
def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float]]]:
def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str, datetime, str, str]], list[tuple[str, float]]]:
'''Returns all the captures related to a hash (sha512), used in the web interface.'''
total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1)
cached_captures = self.sorted_capture_cache([d[0] for d in details])
captures = [(cache.uuid, cache.title) for cache in cached_captures]
captures = []
for capture_uuid, hostnode_uuid, hostname, _, url in details:
cache = self.capture_cache(capture_uuid)
if not cache:
continue
captures.append((cache.uuid, cache.title, cache.timestamp, hostnode_uuid, url))
domains = self.indexing.get_body_hash_domains(body_hash)
return captures, domains
@ -1079,7 +1083,7 @@ class Lookyloo():
captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
prefered_uuids=set(self._captures_index.keys()))
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
for h_capture_uuid, url_uuid, url_hostname, same_url, url in details:
cache = self.capture_cache(h_capture_uuid)
if cache and hasattr(cache, 'title'):
if same_url:

View File

@ -1052,6 +1052,13 @@ def recapture(tree_uuid: str) -> str | Response | WerkzeugResponse:
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
@app.route('/ressource_by_hash/<string:sha512>', methods=['GET'])
@file_response # type: ignore[misc]
def ressource_by_hash(sha512: str) -> Response:
details, body = lookyloo.get_body_hash_full(sha512)
return send_file(body, as_attachment=True, download_name='ressource.bin')
# ################## Submit existing capture ##################
@app.route('/submit_capture', methods=['GET', 'POST'])

View File

@ -1,3 +1,5 @@
{% from "macros.html" import shorten_string %}
{% if from_popup %}
{% extends "main.html" %}
@ -23,50 +25,70 @@
{% endblock %}
{%endif%}
{% block content %}
{% if from_popup %}
<button onclick="window.history.back();" class="btn btn-primary" type="button">Go Back</button>
{%endif%}
<center>
<h6>{{ body_hash }}</h6>
<a href="{{ url_for('ressource_by_hash', sha512=body_hash) }}">Download</a>
</center>
<script type="text/javascript">
new DataTable('#bodyHashDetailsTable', {
new DataTable('#freqHostTable', {
columnDefs: [{ width: '20%', targets: 0 },
{ width: '80%', targets: 1 }],
});
</script>
<center>
<h4>{{ body_hash }}</h4>
</center>
<table id="bodyHashDetailsTable" class="table table-striped" style="width:100%">
<script type="text/javascript">
new DataTable('#bodyHashDetailsTable', {
columnDefs: [{ width: '20%', targets: 0 },
{ width: '40%', targets: 1 },
{ width: '40%', targets: 2 }],
});
</script>
<table id="freqHostTable" class="table table-striped" style="width:100%">
<thead>
<tr>
<th>Hostname</th>
<th>Frequency</th>
<th>Hostname</th>
</tr>
</thead>
<tbody>
{% for domain, freq in domains %}
<tr>
<td>
{{ domain }}
</td>
<td>{{ freq }}</td>
<td>{{ domain }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<p>The same file was seen in these captures:</p>
<ul>
{% for capture_uuid, title in captures %}
<li>
{% if from_popup %}
<a href="#/" onclick="openTreeInNewTab('{{ capture_uuid }}')">{{ title }}</a>
{% else %}
<a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">{{ title }}</a>
{% endif %}
</li>
{% endfor %}
</ul>
<table id="bodyHashDetailsTable" class="table table-striped" style="width:100%">
<thead>
<tr>
<th>Timestamp</th>
<th>Title</th>
<th>URL</th>
</tr>
</thead>
<tbody>
{% for capture_uuid, title, timestamp, hostnode_uuid, url in captures %}
<tr>
<td>{{ timestamp }}</td>
<td>
{% if from_popup %}
<a href="#/" onclick="openTreeInNewTab('{{ capture_uuid }}')">{{ title }}</a>
{% else %}
<a href="{{ url_for('tree', tree_uuid=capture_uuid, node_uuid=hostnode_uuid) }}">{{ title }}</a>
{% endif %}
</td>
<td>{{ url }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endblock %}

View File

@ -1,8 +1,10 @@
{% from "macros.html" import shorten_string %}
<script type="text/javascript">
new DataTable('#faviconDetailsTable', {
columnDefs: [{ width: '40%', targets: 0 },
{ width: '40%', targets: 1 },
{ width: '20%', targets: 2 }],
columnDefs: [{ width: '30%', targets: 0 },
{ width: '30%', targets: 1 },
{ width: '50%', targets: 2 }],
});
</script>
@ -12,24 +14,26 @@
<table id="faviconDetailsTable" class="table table-striped" style="width:100%">
<thead>
<tr>
<th>Capture Time</th>
<th>Capture Title</th>
<th>Landing page</th>
<th>Capture Time</th>
</tr>
</thead>
<tbody>
{% for capture_uuid, title, landing_page, capture_time in captures %}
<tr>
<td>
{{capture_time}}
</td>
<td>
<a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">
{{ title }}
</a>
</td>
<td>
{{landing_page}}
</td>
<td>
{{capture_time}}
<span class="d-inline-block text-truncate" style="max-width: 400px;">
{{ shorten_string(landing_page, 300) }}
</span>
</td>
</tr>
{% endfor %}

View File

@ -538,7 +538,7 @@
<div class="modal-dialog modal-xl" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="faviconsModalLabel">Favicons in tree</h5>
<h5 class="modal-title" id="faviconsModalLabel">Favicons found on the rendered page</h5>
<button type="button" class="btn btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<div class="modal-body">

View File

@ -10,9 +10,9 @@
<table id="bodyHashesTable" class="table table-striped" style="width:100%">
<thead>
<tr>
<th>File type</th>
<th>Captures total</th>
<th>Ressource</th>
<th>File type</th>
<th>Ressource URL in capture</th>
<th>Hash (sha512)</th>
</tr>
</thead>
@ -20,6 +20,7 @@
{% for body_hash, info in body_hashes.items() %}
{% set icon_info = get_icon(info['node'].generic_type) %}
<tr>
<td>{{ info['total_captures'] }}</td>
<td>
<a href="{{ url_for('get_ressource', tree_uuid=tree_uuid, node_uuid=info['node'].uuid) }}">
<img src="{{ url_for('static', filename=icon_info['icon']) }}" alt="{{ icon_info['tooltip'] }}"
@ -34,7 +35,6 @@
/>
</a>
</td>
<td>{{ info['total_captures'] }}</td>
<td><p class="text-break">{{ info['node'].name }}</p></td>
<td>
<span class="d-inline-block text-truncate" style="max-width: 200px;">

View File

@ -1,31 +1,39 @@
<script type="text/javascript">
new DataTable('#faviconsTable', {
columnDefs: [{ width: '20%', targets: 0 },
{ width: '40%', targets: 1 },
columnDefs: [{ width: '10%', targets: 0 },
{ width: '50%', targets: 1 },
{ width: '40%', targets: 2 }],
});
</script>
<h5 class="text-center">Click on the favicon to see the other captures it's been found in</h5>
<table id="faviconsTable" class="table table-striped" style="width:100%">
<thead>
<tr>
<th>Favicon</th>
<th>Frequency</th>
<th>Number of captures</th>
<th>Favicon</th>
<th>Download</th>
</tr>
</thead>
<tbody>
{% for favicon_sha512, freq, number_captures, mimetype, b64_favicon in favicons %}
<tr>
<td>{{ number_captures }}</td>
<td>
<a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512) }}"
data-bs-toggle="modal" data-bs-target="#faviconDetailsModal" role="button">
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"/>
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
title="Click to see other captures with the same favicon"/>
</a>
<button type="button" class="btn btn-light" onclick="downloadBase64File('{{mimetype}}', '{{b64_favicon}}', 'favicon.ico')">Download favicon</button>
<br>
</td>
<td>
<button type="button" class="btn btn-light" onclick="downloadBase64File('{{mimetype}}', '{{b64_favicon}}', 'favicon.ico')">
<img src="{{ url_for('static', filename='download.svg') }}" style="width:16px;height:16px;"
title="Download the favicon"/>
</button>
</td>
<td>{{ freq }}</td>
<td>{{ number_captures }}</td>
</tr>
{% endfor %}
</tbody>