mirror of https://github.com/CIRCL/lookyloo
chg: remove refs to probabilistic favicons index
It was neither probabilistic, nor used.pull/981/head
parent
7eece6b98f
commit
ed16939790
|
@ -2,15 +2,10 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import logging
|
||||
|
||||
from io import BytesIO
|
||||
from datetime import datetime, timedelta
|
||||
from zipfile import ZipFile
|
||||
|
||||
import mmh3
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -845,56 +840,6 @@ class Indexing():
|
|||
def get_captures_identifier_count(self, identifier_type: str, identifier: str) -> int:
|
||||
return self.redis.zcard(f'identifiers|{identifier_type}|{identifier}|captures')
|
||||
|
||||
# ###### favicons probabilistic hashes ######
|
||||
|
||||
def favicon_probabilistic_frequency(self, algorithm: str, phash: str) -> float | None:
|
||||
return self.redis.zscore(f'favicons|{algorithm}', phash)
|
||||
|
||||
def index_favicons_probabilistic(self, capture_uuid: str, favicons: BytesIO, algorithm: str) -> None:
|
||||
# FIXME: this method isnt used anymore
|
||||
if self.redis.sismember(f'indexed_favicons_probabilistic|{algorithm}', capture_uuid):
|
||||
# Do not reindex
|
||||
return
|
||||
self.redis.sadd(f'indexed_favicons_probabilistic|{algorithm}', capture_uuid)
|
||||
pipeline = self.redis.pipeline()
|
||||
with ZipFile(favicons, 'r') as myzip:
|
||||
for name in myzip.namelist():
|
||||
if not name.endswith('.ico'):
|
||||
continue
|
||||
favicon = myzip.read(name)
|
||||
if not favicon:
|
||||
# Empty file, ignore.
|
||||
continue
|
||||
sha = hashlib.sha512(favicon).hexdigest()
|
||||
if algorithm == 'mmh3-shodan':
|
||||
# Shodan uses a weird technique:
|
||||
# 1. encodes the image to base64, with newlines every 76 characters (as per RFC 2045)
|
||||
# 2. hashes the base64 string with mmh3
|
||||
b64 = base64.encodebytes(favicon)
|
||||
h = str(mmh3.hash(b64))
|
||||
else:
|
||||
raise NotImplementedError(f'Unknown algorithm: {algorithm}')
|
||||
pipeline.zincrby(f'favicons|{algorithm}', 1, h)
|
||||
# All captures with this hash for this algorithm
|
||||
pipeline.sadd(f'favicons|{algorithm}|{h}|captures', capture_uuid)
|
||||
# All hashes with this hash for this algorithm
|
||||
pipeline.sadd(f'favicons|{algorithm}|{h}|favicons', sha)
|
||||
# reverse lookup to get probabilistic hashes related to a specific favicon
|
||||
pipeline.sadd(f'favicons|{algorithm}|{sha}', h)
|
||||
pipeline.execute()
|
||||
|
||||
def get_hashes_favicon_probablistic(self, algorithm: str, phash: str) -> set[str]:
|
||||
'''All the favicon sha512 for this probabilistic hash for this algorithm'''
|
||||
return self.redis.smembers(f'favicons|{algorithm}|{phash}|favicons')
|
||||
|
||||
def get_probabilistic_hashes_favicon(self, algorithm: str, favicon_sha512: str) -> set[str]:
|
||||
'''All the probabilistic hashes for this favicon SHA512 for this algorithm'''''
|
||||
return self.redis.smembers(f'favicons|{algorithm}|{favicon_sha512}')
|
||||
|
||||
def get_captures_favicon_probablistic(self, algorithm: str, phash: str) -> set[str]:
|
||||
'''All the captures with this probabilistic hash for this algorithm'''
|
||||
return self.redis.smembers(f'favicons|{algorithm}|{phash}|captures')
|
||||
|
||||
# ###### Categories ######
|
||||
|
||||
@property
|
||||
|
|
|
@ -443,10 +443,8 @@ def get_capture_hash_investigator(hash_type: str, h: str) -> list[tuple[str, str
|
|||
|
||||
|
||||
def get_favicon_investigator(favicon_sha512: str,
|
||||
/,
|
||||
get_probabilistic: bool=False) -> tuple[list[tuple[str, str, str, datetime]],
|
||||
tuple[str, str, str],
|
||||
dict[str, dict[str, dict[str, tuple[str, str]]]]]:
|
||||
/) -> tuple[list[tuple[str, str, str, datetime]],
|
||||
tuple[str, str, str]]:
|
||||
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
||||
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_favicon(favicon_sha512)])
|
||||
captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
|
||||
|
@ -460,30 +458,7 @@ def get_favicon_investigator(favicon_sha512: str,
|
|||
b64_favicon = ''
|
||||
mmh3_shodan = ''
|
||||
|
||||
# For now, there is only one probabilistic hash algo for favicons, keeping it simple
|
||||
probabilistic_hash_algos = ['mmh3-shodan']
|
||||
probabilistic_favicons: dict[str, dict[str, dict[str, tuple[str, str]]]] = {}
|
||||
if get_probabilistic:
|
||||
for algo in probabilistic_hash_algos:
|
||||
probabilistic_favicons[algo] = {}
|
||||
for mm3hash in get_indexing(flask_login.current_user).get_probabilistic_hashes_favicon(algo, favicon_sha512):
|
||||
probabilistic_favicons[algo][mm3hash] = {}
|
||||
for sha512 in get_indexing(flask_login.current_user).get_hashes_favicon_probablistic(algo, mm3hash):
|
||||
if sha512 == favicon_sha512:
|
||||
# Skip entry if it is the same as the favicon we are investigating
|
||||
continue
|
||||
favicon = get_indexing(flask_login.current_user).get_favicon(sha512)
|
||||
if favicon:
|
||||
mimetype = from_string(favicon, mime=True)
|
||||
b64_favicon = base64.b64encode(favicon).decode()
|
||||
probabilistic_favicons[algo][mm3hash][sha512] = (mimetype, b64_favicon)
|
||||
if not probabilistic_favicons[algo][mm3hash]:
|
||||
# remove entry if it has no favicon
|
||||
probabilistic_favicons[algo].pop(mm3hash)
|
||||
if not probabilistic_favicons[algo]:
|
||||
# remove entry if it has no hash
|
||||
probabilistic_favicons.pop(algo)
|
||||
return captures, (mimetype, b64_favicon, mmh3_shodan), probabilistic_favicons
|
||||
return captures, (mimetype, b64_favicon, mmh3_shodan)
|
||||
|
||||
|
||||
def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
|
||||
|
@ -1787,14 +1762,12 @@ def capture_hash_details(hash_type: str, h: str) -> str:
|
|||
|
||||
|
||||
@app.route('/favicon_details/<string:favicon_sha512>', methods=['GET'])
|
||||
@app.route('/favicon_details/<string:favicon_sha512>/<int:get_probabilistic>', methods=['GET'])
|
||||
def favicon_detail(favicon_sha512: str, get_probabilistic: int=0) -> str:
|
||||
_get_prob = bool(get_probabilistic)
|
||||
captures, favicon, probabilistic_favicons = get_favicon_investigator(favicon_sha512.strip(), get_probabilistic=_get_prob)
|
||||
def favicon_detail(favicon_sha512: str) -> str:
|
||||
captures, favicon = get_favicon_investigator(favicon_sha512.strip())
|
||||
mimetype, b64_favicon, mmh3_shodan = favicon
|
||||
return render_template('favicon_details.html', favicon_sha512=favicon_sha512,
|
||||
captures=captures, mimetype=mimetype, b64_favicon=b64_favicon, mmh3_shodan=mmh3_shodan,
|
||||
probabilistic_favicons=probabilistic_favicons)
|
||||
captures=captures, mimetype=mimetype, b64_favicon=b64_favicon,
|
||||
mmh3_shodan=mmh3_shodan)
|
||||
|
||||
|
||||
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
||||
|
|
|
@ -48,18 +48,3 @@
|
|||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
{%for probabilistic_hash_algo, entries in probabilistic_favicons.items() %}
|
||||
<h3>Probabilistic Favicon Hashes ({{ probabilistic_hash_algo }})</h3>
|
||||
{% for mm3h, favicons in entries.items() %}
|
||||
<h4>MM3 Hash: {{ mm3h }}</h4>
|
||||
{% for sha512, favicon in favicons.items() %}
|
||||
<a href="#faviconDetailsProbabilisticHashModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=sha512, get_probabilistic=0) }}"
|
||||
data-bs-toggle="modal" data-bs-target="#faviconDetailsProbabilisticHashModal" role="button">
|
||||
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
|
||||
title="Click to see other captures with the same favicon"/>
|
||||
</a>
|
||||
<br>
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
|
|
@ -32,7 +32,7 @@ if (downloadFavicons) {
|
|||
<tr>
|
||||
<td>{{ number_captures }}</td>
|
||||
<td>
|
||||
<a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512, get_probabilistic=0) }}"
|
||||
<a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512) }}"
|
||||
data-bs-toggle="modal" data-bs-target="#faviconDetailsModal" role="button">
|
||||
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
|
||||
title="Click to see other captures with the same favicon"/>
|
||||
|
|
Loading…
Reference in New Issue