chg: remove refs to probabilistic favicons index

It was neither probabilistic, nor used.
pull/981/head
Raphaël Vinot 2024-10-31 15:18:39 +01:00
parent 7eece6b98f
commit ed16939790
4 changed files with 8 additions and 105 deletions

View File

@ -2,15 +2,10 @@
from __future__ import annotations from __future__ import annotations
import base64
import hashlib import hashlib
import logging import logging
from io import BytesIO
from datetime import datetime, timedelta from datetime import datetime, timedelta
from zipfile import ZipFile
import mmh3
from pathlib import Path from pathlib import Path
@ -845,56 +840,6 @@ class Indexing():
def get_captures_identifier_count(self, identifier_type: str, identifier: str) -> int: def get_captures_identifier_count(self, identifier_type: str, identifier: str) -> int:
return self.redis.zcard(f'identifiers|{identifier_type}|{identifier}|captures') return self.redis.zcard(f'identifiers|{identifier_type}|{identifier}|captures')
# ###### favicons probabilistic hashes ######
def favicon_probabilistic_frequency(self, algorithm: str, phash: str) -> float | None:
return self.redis.zscore(f'favicons|{algorithm}', phash)
def index_favicons_probabilistic(self, capture_uuid: str, favicons: BytesIO, algorithm: str) -> None:
# FIXME: this method isnt used anymore
if self.redis.sismember(f'indexed_favicons_probabilistic|{algorithm}', capture_uuid):
# Do not reindex
return
self.redis.sadd(f'indexed_favicons_probabilistic|{algorithm}', capture_uuid)
pipeline = self.redis.pipeline()
with ZipFile(favicons, 'r') as myzip:
for name in myzip.namelist():
if not name.endswith('.ico'):
continue
favicon = myzip.read(name)
if not favicon:
# Empty file, ignore.
continue
sha = hashlib.sha512(favicon).hexdigest()
if algorithm == 'mmh3-shodan':
# Shodan uses a weird technique:
# 1. encodes the image to base64, with newlines every 76 characters (as per RFC 2045)
# 2. hashes the base64 string with mmh3
b64 = base64.encodebytes(favicon)
h = str(mmh3.hash(b64))
else:
raise NotImplementedError(f'Unknown algorithm: {algorithm}')
pipeline.zincrby(f'favicons|{algorithm}', 1, h)
# All captures with this hash for this algorithm
pipeline.sadd(f'favicons|{algorithm}|{h}|captures', capture_uuid)
# All hashes with this hash for this algorithm
pipeline.sadd(f'favicons|{algorithm}|{h}|favicons', sha)
# reverse lookup to get probabilistic hashes related to a specific favicon
pipeline.sadd(f'favicons|{algorithm}|{sha}', h)
pipeline.execute()
def get_hashes_favicon_probablistic(self, algorithm: str, phash: str) -> set[str]:
'''All the favicon sha512 for this probabilistic hash for this algorithm'''
return self.redis.smembers(f'favicons|{algorithm}|{phash}|favicons')
def get_probabilistic_hashes_favicon(self, algorithm: str, favicon_sha512: str) -> set[str]:
'''All the probabilistic hashes for this favicon SHA512 for this algorithm'''''
return self.redis.smembers(f'favicons|{algorithm}|{favicon_sha512}')
def get_captures_favicon_probablistic(self, algorithm: str, phash: str) -> set[str]:
'''All the captures with this probabilistic hash for this algorithm'''
return self.redis.smembers(f'favicons|{algorithm}|{phash}|captures')
# ###### Categories ###### # ###### Categories ######
@property @property

View File

@ -443,10 +443,8 @@ def get_capture_hash_investigator(hash_type: str, h: str) -> list[tuple[str, str
def get_favicon_investigator(favicon_sha512: str, def get_favicon_investigator(favicon_sha512: str,
/, /) -> tuple[list[tuple[str, str, str, datetime]],
get_probabilistic: bool=False) -> tuple[list[tuple[str, str, str, datetime]], tuple[str, str, str]]:
tuple[str, str, str],
dict[str, dict[str, dict[str, tuple[str, str]]]]]:
'''Returns all the captures related to a cookie name entry, used in the web interface.''' '''Returns all the captures related to a cookie name entry, used in the web interface.'''
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_favicon(favicon_sha512)]) cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_favicon(favicon_sha512)])
captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures] captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
@ -460,30 +458,7 @@ def get_favicon_investigator(favicon_sha512: str,
b64_favicon = '' b64_favicon = ''
mmh3_shodan = '' mmh3_shodan = ''
# For now, there is only one probabilistic hash algo for favicons, keeping it simple return captures, (mimetype, b64_favicon, mmh3_shodan)
probabilistic_hash_algos = ['mmh3-shodan']
probabilistic_favicons: dict[str, dict[str, dict[str, tuple[str, str]]]] = {}
if get_probabilistic:
for algo in probabilistic_hash_algos:
probabilistic_favicons[algo] = {}
for mm3hash in get_indexing(flask_login.current_user).get_probabilistic_hashes_favicon(algo, favicon_sha512):
probabilistic_favicons[algo][mm3hash] = {}
for sha512 in get_indexing(flask_login.current_user).get_hashes_favicon_probablistic(algo, mm3hash):
if sha512 == favicon_sha512:
# Skip entry if it is the same as the favicon we are investigating
continue
favicon = get_indexing(flask_login.current_user).get_favicon(sha512)
if favicon:
mimetype = from_string(favicon, mime=True)
b64_favicon = base64.b64encode(favicon).decode()
probabilistic_favicons[algo][mm3hash][sha512] = (mimetype, b64_favicon)
if not probabilistic_favicons[algo][mm3hash]:
# remove entry if it has no favicon
probabilistic_favicons[algo].pop(mm3hash)
if not probabilistic_favicons[algo]:
# remove entry if it has no hash
probabilistic_favicons.pop(algo)
return captures, (mimetype, b64_favicon, mmh3_shodan), probabilistic_favicons
def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]: def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
@ -1787,14 +1762,12 @@ def capture_hash_details(hash_type: str, h: str) -> str:
@app.route('/favicon_details/<string:favicon_sha512>', methods=['GET']) @app.route('/favicon_details/<string:favicon_sha512>', methods=['GET'])
@app.route('/favicon_details/<string:favicon_sha512>/<int:get_probabilistic>', methods=['GET']) def favicon_detail(favicon_sha512: str) -> str:
def favicon_detail(favicon_sha512: str, get_probabilistic: int=0) -> str: captures, favicon = get_favicon_investigator(favicon_sha512.strip())
_get_prob = bool(get_probabilistic)
captures, favicon, probabilistic_favicons = get_favicon_investigator(favicon_sha512.strip(), get_probabilistic=_get_prob)
mimetype, b64_favicon, mmh3_shodan = favicon mimetype, b64_favicon, mmh3_shodan = favicon
return render_template('favicon_details.html', favicon_sha512=favicon_sha512, return render_template('favicon_details.html', favicon_sha512=favicon_sha512,
captures=captures, mimetype=mimetype, b64_favicon=b64_favicon, mmh3_shodan=mmh3_shodan, captures=captures, mimetype=mimetype, b64_favicon=b64_favicon,
probabilistic_favicons=probabilistic_favicons) mmh3_shodan=mmh3_shodan)
@app.route('/body_hashes/<string:body_hash>', methods=['GET']) @app.route('/body_hashes/<string:body_hash>', methods=['GET'])

View File

@ -48,18 +48,3 @@
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
{%for probabilistic_hash_algo, entries in probabilistic_favicons.items() %}
<h3>Probabilistic Favicon Hashes ({{ probabilistic_hash_algo }})</h3>
{% for mm3h, favicons in entries.items() %}
<h4>MM3 Hash: {{ mm3h }}</h4>
{% for sha512, favicon in favicons.items() %}
<a href="#faviconDetailsProbabilisticHashModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=sha512, get_probabilistic=0) }}"
data-bs-toggle="modal" data-bs-target="#faviconDetailsProbabilisticHashModal" role="button">
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
title="Click to see other captures with the same favicon"/>
</a>
<br>
{% endfor %}
{% endfor %}
{% endfor %}

View File

@ -32,7 +32,7 @@ if (downloadFavicons) {
<tr> <tr>
<td>{{ number_captures }}</td> <td>{{ number_captures }}</td>
<td> <td>
<a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512, get_probabilistic=0) }}" <a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512) }}"
data-bs-toggle="modal" data-bs-target="#faviconDetailsModal" role="button"> data-bs-toggle="modal" data-bs-target="#faviconDetailsModal" role="button">
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;" <img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
title="Click to see other captures with the same favicon"/> title="Click to see other captures with the same favicon"/>