new: Add shodan hash on favicon views

pull/888/head
Raphaël Vinot 2024-02-26 19:09:48 +01:00
parent decf887b63
commit 9e302a9b14
5 changed files with 24 additions and 16 deletions

View File

@ -33,7 +33,8 @@ class BackgroundIndexer(AbstractManager):
all_done = self._build_missing_pickles()
if all_done:
self._check_indexes()
self._check_probabilistic_indexes()
# Disable probabilistic indexing for now, mmh3 isn't a fuzzy hash ago.
# self._check_probabilistic_indexes()
self.lookyloo.update_tree_cache_info(os.getpid(), self.script_name)
def _build_missing_pickles(self) -> bool:

View File

@ -23,7 +23,7 @@ from urllib.parse import urlparse
from uuid import uuid4
from zipfile import ZipFile
import magic
import mmh3
from defang import defang # type: ignore[import-untyped]
from har2tree import CrawledTree, HostNode, URLNode
@ -865,8 +865,7 @@ class Lookyloo():
fav = self._get_raw(capture_uuid, 'potential_favicons.ico', all_favicons)
if not all_favicons and for_datauri:
favicon = fav.getvalue()
f = magic.Magic(mime=True)
mimetype = f.from_buffer(favicon)
mimetype = from_string(favicon, mime=True)
return mimetype, base64.b64encode(favicon).decode()
return fav
@ -1056,11 +1055,15 @@ class Lookyloo():
for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
return captures, domains
def compute_mmh3_shodan(self, favicon: bytes, /) -> str:
b64 = base64.encodebytes(favicon)
return str(mmh3.hash(b64))
def get_favicon_investigator(self, favicon_sha512: str,
/,
get_probabilistic=True) -> tuple[list[tuple[str, str, str, datetime]],
tuple[str, str],
dict[str, dict[str, dict[str, tuple[str, str]]]]]:
get_probabilistic: bool=False) -> tuple[list[tuple[str, str, str, datetime]],
tuple[str, str, str],
dict[str, dict[str, dict[str, tuple[str, str]]]]]:
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
cached_captures = self.sorted_capture_cache([uuid for uuid in self.indexing.get_captures_favicon(favicon_sha512)])
captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
@ -1068,6 +1071,7 @@ class Lookyloo():
if favicon:
mimetype = from_string(favicon, mime=True)
b64_favicon = base64.b64encode(favicon).decode()
mmh3_shodan = self.compute_mmh3_shodan(favicon)
else:
mimetype = ''
b64_favicon = ''
@ -1095,7 +1099,7 @@ class Lookyloo():
if not probabilistic_favicons[algo]:
# remove entry if it has no hash
probabilistic_favicons.pop(algo)
return captures, (mimetype, b64_favicon), probabilistic_favicons
return captures, (mimetype, b64_favicon, mmh3_shodan), probabilistic_favicons
def get_hhh_investigator(self, hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
'''Returns all the captures related to a cookie name entry, used in the web interface.'''

View File

@ -852,7 +852,8 @@ def tree_favicons(tree_uuid: str) -> str:
frequency = lookyloo.indexing.favicon_frequency(favicon_sha512)
number_captures = lookyloo.indexing.favicon_number_captures(favicon_sha512)
b64_favicon = base64.b64encode(favicon).decode()
favicons.append((favicon_sha512, frequency, number_captures, mimetype, b64_favicon))
mmh3_shodan = lookyloo.compute_mmh3_shodan(favicon)
favicons.append((favicon_sha512, frequency, number_captures, mimetype, b64_favicon, mmh3_shodan))
return render_template('tree_favicons.html', tree_uuid=tree_uuid, favicons=favicons)
@ -1251,12 +1252,12 @@ def hhh_detail(hhh: str) -> str:
@app.route('/favicon_details/<string:favicon_sha512>', methods=['GET'])
@app.route('/favicon_details/<string:favicon_sha512>/<int:get_probabilistic>', methods=['GET'])
def favicon_detail(favicon_sha512: str, get_probabilistic: int=1) -> str:
def favicon_detail(favicon_sha512: str, get_probabilistic: int=0) -> str:
_get_prob = bool(get_probabilistic)
captures, favicon, probabilistic_favicons = lookyloo.get_favicon_investigator(favicon_sha512.strip(), get_probabilistic=_get_prob)
mimetype, b64_favicon = favicon
mimetype, b64_favicon, mmh3_shodan = favicon
return render_template('favicon_details.html', favicon_sha512=favicon_sha512,
captures=captures, mimetype=mimetype, b64_favicon=b64_favicon,
captures=captures, mimetype=mimetype, b64_favicon=b64_favicon, mmh3_shodan=mmh3_shodan,
probabilistic_favicons=probabilistic_favicons)

View File

@ -9,7 +9,8 @@
</script>
<center>
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:64px;height:64px;"/>
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"/>
<h5>Shodan MMH3 Hash: {{ mmh3_shodan }}</h5>
</center>
<table id="faviconDetailsTable_{{favicon_sha512}}" class="table table-striped" style="width:100%">
<thead>

View File

@ -13,21 +13,22 @@
<tr>
<th>Number of captures</th>
<th>Favicon</th>
<th>Shodan MMH3</th>
<th>Download</th>
</tr>
</thead>
<tbody>
{% for favicon_sha512, freq, number_captures, mimetype, b64_favicon in favicons %}
{% for favicon_sha512, freq, number_captures, mimetype, b64_favicon, mmh3_shodan in favicons %}
<tr>
<td>{{ number_captures }}</td>
<td>
<a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512) }}"
<a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512, get_probabilistic=0) }}"
data-bs-toggle="modal" data-bs-target="#faviconDetailsModal" role="button">
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
title="Click to see other captures with the same favicon"/>
</a>
<br>
</td>
<td>{{ mmh3_shodan }}</td>
<td>
<button type="button" class="btn btn-light" onclick="downloadBase64File('{{mimetype}}', '{{b64_favicon}}', 'favicon.ico')">
<img src="{{ url_for('static', filename='download.svg') }}" style="width:16px;height:16px;"