From ed16939790f7e436fe967285cb15c424514d2070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 31 Oct 2024 15:18:39 +0100 Subject: [PATCH] chg: remove refs to probabilistic favicons index It was neither probabilistic, nor used. --- lookyloo/indexing.py | 55 ---------------------- website/web/__init__.py | 41 +++------------- website/web/templates/favicon_details.html | 15 ------ website/web/templates/tree_favicons.html | 2 +- 4 files changed, 8 insertions(+), 105 deletions(-) diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index 0dfcbd9c..2672b079 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -2,15 +2,10 @@ from __future__ import annotations -import base64 import hashlib import logging -from io import BytesIO from datetime import datetime, timedelta -from zipfile import ZipFile - -import mmh3 from pathlib import Path @@ -845,56 +840,6 @@ class Indexing(): def get_captures_identifier_count(self, identifier_type: str, identifier: str) -> int: return self.redis.zcard(f'identifiers|{identifier_type}|{identifier}|captures') - # ###### favicons probabilistic hashes ###### - - def favicon_probabilistic_frequency(self, algorithm: str, phash: str) -> float | None: - return self.redis.zscore(f'favicons|{algorithm}', phash) - - def index_favicons_probabilistic(self, capture_uuid: str, favicons: BytesIO, algorithm: str) -> None: - # FIXME: this method isnt used anymore - if self.redis.sismember(f'indexed_favicons_probabilistic|{algorithm}', capture_uuid): - # Do not reindex - return - self.redis.sadd(f'indexed_favicons_probabilistic|{algorithm}', capture_uuid) - pipeline = self.redis.pipeline() - with ZipFile(favicons, 'r') as myzip: - for name in myzip.namelist(): - if not name.endswith('.ico'): - continue - favicon = myzip.read(name) - if not favicon: - # Empty file, ignore. - continue - sha = hashlib.sha512(favicon).hexdigest() - if algorithm == 'mmh3-shodan': - # Shodan uses a weird technique: - # 1. encodes the image to base64, with newlines every 76 characters (as per RFC 2045) - # 2. hashes the base64 string with mmh3 - b64 = base64.encodebytes(favicon) - h = str(mmh3.hash(b64)) - else: - raise NotImplementedError(f'Unknown algorithm: {algorithm}') - pipeline.zincrby(f'favicons|{algorithm}', 1, h) - # All captures with this hash for this algorithm - pipeline.sadd(f'favicons|{algorithm}|{h}|captures', capture_uuid) - # All hashes with this hash for this algorithm - pipeline.sadd(f'favicons|{algorithm}|{h}|favicons', sha) - # reverse lookup to get probabilistic hashes related to a specific favicon - pipeline.sadd(f'favicons|{algorithm}|{sha}', h) - pipeline.execute() - - def get_hashes_favicon_probablistic(self, algorithm: str, phash: str) -> set[str]: - '''All the favicon sha512 for this probabilistic hash for this algorithm''' - return self.redis.smembers(f'favicons|{algorithm}|{phash}|favicons') - - def get_probabilistic_hashes_favicon(self, algorithm: str, favicon_sha512: str) -> set[str]: - '''All the probabilistic hashes for this favicon SHA512 for this algorithm''''' - return self.redis.smembers(f'favicons|{algorithm}|{favicon_sha512}') - - def get_captures_favicon_probablistic(self, algorithm: str, phash: str) -> set[str]: - '''All the captures with this probabilistic hash for this algorithm''' - return self.redis.smembers(f'favicons|{algorithm}|{phash}|captures') - # ###### Categories ###### @property diff --git a/website/web/__init__.py b/website/web/__init__.py index a4a5162d..c207ad88 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -443,10 +443,8 @@ def get_capture_hash_investigator(hash_type: str, h: str) -> list[tuple[str, str def get_favicon_investigator(favicon_sha512: str, - /, - get_probabilistic: bool=False) -> tuple[list[tuple[str, str, str, datetime]], - tuple[str, str, str], - dict[str, dict[str, dict[str, tuple[str, str]]]]]: + /) -> tuple[list[tuple[str, str, str, datetime]], + tuple[str, str, str]]: '''Returns all the captures related to a cookie name entry, used in the web interface.''' cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_favicon(favicon_sha512)]) captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures] @@ -460,30 +458,7 @@ def get_favicon_investigator(favicon_sha512: str, b64_favicon = '' mmh3_shodan = '' - # For now, there is only one probabilistic hash algo for favicons, keeping it simple - probabilistic_hash_algos = ['mmh3-shodan'] - probabilistic_favicons: dict[str, dict[str, dict[str, tuple[str, str]]]] = {} - if get_probabilistic: - for algo in probabilistic_hash_algos: - probabilistic_favicons[algo] = {} - for mm3hash in get_indexing(flask_login.current_user).get_probabilistic_hashes_favicon(algo, favicon_sha512): - probabilistic_favicons[algo][mm3hash] = {} - for sha512 in get_indexing(flask_login.current_user).get_hashes_favicon_probablistic(algo, mm3hash): - if sha512 == favicon_sha512: - # Skip entry if it is the same as the favicon we are investigating - continue - favicon = get_indexing(flask_login.current_user).get_favicon(sha512) - if favicon: - mimetype = from_string(favicon, mime=True) - b64_favicon = base64.b64encode(favicon).decode() - probabilistic_favicons[algo][mm3hash][sha512] = (mimetype, b64_favicon) - if not probabilistic_favicons[algo][mm3hash]: - # remove entry if it has no favicon - probabilistic_favicons[algo].pop(mm3hash) - if not probabilistic_favicons[algo]: - # remove entry if it has no hash - probabilistic_favicons.pop(algo) - return captures, (mimetype, b64_favicon, mmh3_shodan), probabilistic_favicons + return captures, (mimetype, b64_favicon, mmh3_shodan) def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]: @@ -1787,14 +1762,12 @@ def capture_hash_details(hash_type: str, h: str) -> str: @app.route('/favicon_details/', methods=['GET']) -@app.route('/favicon_details//', methods=['GET']) -def favicon_detail(favicon_sha512: str, get_probabilistic: int=0) -> str: - _get_prob = bool(get_probabilistic) - captures, favicon, probabilistic_favicons = get_favicon_investigator(favicon_sha512.strip(), get_probabilistic=_get_prob) +def favicon_detail(favicon_sha512: str) -> str: + captures, favicon = get_favicon_investigator(favicon_sha512.strip()) mimetype, b64_favicon, mmh3_shodan = favicon return render_template('favicon_details.html', favicon_sha512=favicon_sha512, - captures=captures, mimetype=mimetype, b64_favicon=b64_favicon, mmh3_shodan=mmh3_shodan, - probabilistic_favicons=probabilistic_favicons) + captures=captures, mimetype=mimetype, b64_favicon=b64_favicon, + mmh3_shodan=mmh3_shodan) @app.route('/body_hashes/', methods=['GET']) diff --git a/website/web/templates/favicon_details.html b/website/web/templates/favicon_details.html index 1716d5c9..60682a2f 100644 --- a/website/web/templates/favicon_details.html +++ b/website/web/templates/favicon_details.html @@ -48,18 +48,3 @@ {% endfor %} - -{%for probabilistic_hash_algo, entries in probabilistic_favicons.items() %} -

Probabilistic Favicon Hashes ({{ probabilistic_hash_algo }})

- {% for mm3h, favicons in entries.items() %} -

MM3 Hash: {{ mm3h }}

- {% for sha512, favicon in favicons.items() %} - - - -
- {% endfor %} - {% endfor %} -{% endfor %} diff --git a/website/web/templates/tree_favicons.html b/website/web/templates/tree_favicons.html index bc2048f9..a38acb86 100644 --- a/website/web/templates/tree_favicons.html +++ b/website/web/templates/tree_favicons.html @@ -32,7 +32,7 @@ if (downloadFavicons) { {{ number_captures }} -