mirror of https://github.com/CIRCL/lookyloo
chg: remove refs to probabilistic favicons index
It was neither probabilistic, nor used.pull/981/head
parent
7eece6b98f
commit
ed16939790
|
@ -2,15 +2,10 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from io import BytesIO
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from zipfile import ZipFile
|
|
||||||
|
|
||||||
import mmh3
|
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -845,56 +840,6 @@ class Indexing():
|
||||||
def get_captures_identifier_count(self, identifier_type: str, identifier: str) -> int:
|
def get_captures_identifier_count(self, identifier_type: str, identifier: str) -> int:
|
||||||
return self.redis.zcard(f'identifiers|{identifier_type}|{identifier}|captures')
|
return self.redis.zcard(f'identifiers|{identifier_type}|{identifier}|captures')
|
||||||
|
|
||||||
# ###### favicons probabilistic hashes ######
|
|
||||||
|
|
||||||
def favicon_probabilistic_frequency(self, algorithm: str, phash: str) -> float | None:
|
|
||||||
return self.redis.zscore(f'favicons|{algorithm}', phash)
|
|
||||||
|
|
||||||
def index_favicons_probabilistic(self, capture_uuid: str, favicons: BytesIO, algorithm: str) -> None:
|
|
||||||
# FIXME: this method isnt used anymore
|
|
||||||
if self.redis.sismember(f'indexed_favicons_probabilistic|{algorithm}', capture_uuid):
|
|
||||||
# Do not reindex
|
|
||||||
return
|
|
||||||
self.redis.sadd(f'indexed_favicons_probabilistic|{algorithm}', capture_uuid)
|
|
||||||
pipeline = self.redis.pipeline()
|
|
||||||
with ZipFile(favicons, 'r') as myzip:
|
|
||||||
for name in myzip.namelist():
|
|
||||||
if not name.endswith('.ico'):
|
|
||||||
continue
|
|
||||||
favicon = myzip.read(name)
|
|
||||||
if not favicon:
|
|
||||||
# Empty file, ignore.
|
|
||||||
continue
|
|
||||||
sha = hashlib.sha512(favicon).hexdigest()
|
|
||||||
if algorithm == 'mmh3-shodan':
|
|
||||||
# Shodan uses a weird technique:
|
|
||||||
# 1. encodes the image to base64, with newlines every 76 characters (as per RFC 2045)
|
|
||||||
# 2. hashes the base64 string with mmh3
|
|
||||||
b64 = base64.encodebytes(favicon)
|
|
||||||
h = str(mmh3.hash(b64))
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(f'Unknown algorithm: {algorithm}')
|
|
||||||
pipeline.zincrby(f'favicons|{algorithm}', 1, h)
|
|
||||||
# All captures with this hash for this algorithm
|
|
||||||
pipeline.sadd(f'favicons|{algorithm}|{h}|captures', capture_uuid)
|
|
||||||
# All hashes with this hash for this algorithm
|
|
||||||
pipeline.sadd(f'favicons|{algorithm}|{h}|favicons', sha)
|
|
||||||
# reverse lookup to get probabilistic hashes related to a specific favicon
|
|
||||||
pipeline.sadd(f'favicons|{algorithm}|{sha}', h)
|
|
||||||
pipeline.execute()
|
|
||||||
|
|
||||||
def get_hashes_favicon_probablistic(self, algorithm: str, phash: str) -> set[str]:
|
|
||||||
'''All the favicon sha512 for this probabilistic hash for this algorithm'''
|
|
||||||
return self.redis.smembers(f'favicons|{algorithm}|{phash}|favicons')
|
|
||||||
|
|
||||||
def get_probabilistic_hashes_favicon(self, algorithm: str, favicon_sha512: str) -> set[str]:
|
|
||||||
'''All the probabilistic hashes for this favicon SHA512 for this algorithm'''''
|
|
||||||
return self.redis.smembers(f'favicons|{algorithm}|{favicon_sha512}')
|
|
||||||
|
|
||||||
def get_captures_favicon_probablistic(self, algorithm: str, phash: str) -> set[str]:
|
|
||||||
'''All the captures with this probabilistic hash for this algorithm'''
|
|
||||||
return self.redis.smembers(f'favicons|{algorithm}|{phash}|captures')
|
|
||||||
|
|
||||||
# ###### Categories ######
|
# ###### Categories ######
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -443,10 +443,8 @@ def get_capture_hash_investigator(hash_type: str, h: str) -> list[tuple[str, str
|
||||||
|
|
||||||
|
|
||||||
def get_favicon_investigator(favicon_sha512: str,
|
def get_favicon_investigator(favicon_sha512: str,
|
||||||
/,
|
/) -> tuple[list[tuple[str, str, str, datetime]],
|
||||||
get_probabilistic: bool=False) -> tuple[list[tuple[str, str, str, datetime]],
|
tuple[str, str, str]]:
|
||||||
tuple[str, str, str],
|
|
||||||
dict[str, dict[str, dict[str, tuple[str, str]]]]]:
|
|
||||||
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
||||||
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_favicon(favicon_sha512)])
|
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_favicon(favicon_sha512)])
|
||||||
captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
|
captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
|
||||||
|
@ -460,30 +458,7 @@ def get_favicon_investigator(favicon_sha512: str,
|
||||||
b64_favicon = ''
|
b64_favicon = ''
|
||||||
mmh3_shodan = ''
|
mmh3_shodan = ''
|
||||||
|
|
||||||
# For now, there is only one probabilistic hash algo for favicons, keeping it simple
|
return captures, (mimetype, b64_favicon, mmh3_shodan)
|
||||||
probabilistic_hash_algos = ['mmh3-shodan']
|
|
||||||
probabilistic_favicons: dict[str, dict[str, dict[str, tuple[str, str]]]] = {}
|
|
||||||
if get_probabilistic:
|
|
||||||
for algo in probabilistic_hash_algos:
|
|
||||||
probabilistic_favicons[algo] = {}
|
|
||||||
for mm3hash in get_indexing(flask_login.current_user).get_probabilistic_hashes_favicon(algo, favicon_sha512):
|
|
||||||
probabilistic_favicons[algo][mm3hash] = {}
|
|
||||||
for sha512 in get_indexing(flask_login.current_user).get_hashes_favicon_probablistic(algo, mm3hash):
|
|
||||||
if sha512 == favicon_sha512:
|
|
||||||
# Skip entry if it is the same as the favicon we are investigating
|
|
||||||
continue
|
|
||||||
favicon = get_indexing(flask_login.current_user).get_favicon(sha512)
|
|
||||||
if favicon:
|
|
||||||
mimetype = from_string(favicon, mime=True)
|
|
||||||
b64_favicon = base64.b64encode(favicon).decode()
|
|
||||||
probabilistic_favicons[algo][mm3hash][sha512] = (mimetype, b64_favicon)
|
|
||||||
if not probabilistic_favicons[algo][mm3hash]:
|
|
||||||
# remove entry if it has no favicon
|
|
||||||
probabilistic_favicons[algo].pop(mm3hash)
|
|
||||||
if not probabilistic_favicons[algo]:
|
|
||||||
# remove entry if it has no hash
|
|
||||||
probabilistic_favicons.pop(algo)
|
|
||||||
return captures, (mimetype, b64_favicon, mmh3_shodan), probabilistic_favicons
|
|
||||||
|
|
||||||
|
|
||||||
def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
|
def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
|
||||||
|
@ -1787,14 +1762,12 @@ def capture_hash_details(hash_type: str, h: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
@app.route('/favicon_details/<string:favicon_sha512>', methods=['GET'])
|
@app.route('/favicon_details/<string:favicon_sha512>', methods=['GET'])
|
||||||
@app.route('/favicon_details/<string:favicon_sha512>/<int:get_probabilistic>', methods=['GET'])
|
def favicon_detail(favicon_sha512: str) -> str:
|
||||||
def favicon_detail(favicon_sha512: str, get_probabilistic: int=0) -> str:
|
captures, favicon = get_favicon_investigator(favicon_sha512.strip())
|
||||||
_get_prob = bool(get_probabilistic)
|
|
||||||
captures, favicon, probabilistic_favicons = get_favicon_investigator(favicon_sha512.strip(), get_probabilistic=_get_prob)
|
|
||||||
mimetype, b64_favicon, mmh3_shodan = favicon
|
mimetype, b64_favicon, mmh3_shodan = favicon
|
||||||
return render_template('favicon_details.html', favicon_sha512=favicon_sha512,
|
return render_template('favicon_details.html', favicon_sha512=favicon_sha512,
|
||||||
captures=captures, mimetype=mimetype, b64_favicon=b64_favicon, mmh3_shodan=mmh3_shodan,
|
captures=captures, mimetype=mimetype, b64_favicon=b64_favicon,
|
||||||
probabilistic_favicons=probabilistic_favicons)
|
mmh3_shodan=mmh3_shodan)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
||||||
|
|
|
@ -48,18 +48,3 @@
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
{%for probabilistic_hash_algo, entries in probabilistic_favicons.items() %}
|
|
||||||
<h3>Probabilistic Favicon Hashes ({{ probabilistic_hash_algo }})</h3>
|
|
||||||
{% for mm3h, favicons in entries.items() %}
|
|
||||||
<h4>MM3 Hash: {{ mm3h }}</h4>
|
|
||||||
{% for sha512, favicon in favicons.items() %}
|
|
||||||
<a href="#faviconDetailsProbabilisticHashModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=sha512, get_probabilistic=0) }}"
|
|
||||||
data-bs-toggle="modal" data-bs-target="#faviconDetailsProbabilisticHashModal" role="button">
|
|
||||||
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
|
|
||||||
title="Click to see other captures with the same favicon"/>
|
|
||||||
</a>
|
|
||||||
<br>
|
|
||||||
{% endfor %}
|
|
||||||
{% endfor %}
|
|
||||||
{% endfor %}
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ if (downloadFavicons) {
|
||||||
<tr>
|
<tr>
|
||||||
<td>{{ number_captures }}</td>
|
<td>{{ number_captures }}</td>
|
||||||
<td>
|
<td>
|
||||||
<a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512, get_probabilistic=0) }}"
|
<a href="#faviconDetailsModal" data-remote="{{ url_for('favicon_detail', favicon_sha512=favicon_sha512) }}"
|
||||||
data-bs-toggle="modal" data-bs-target="#faviconDetailsModal" role="button">
|
data-bs-toggle="modal" data-bs-target="#faviconDetailsModal" role="button">
|
||||||
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
|
<img src="data:{{mimetype}};base64,{{ b64_favicon }}" style="width:32px;height:32px;"
|
||||||
title="Click to see other captures with the same favicon"/>
|
title="Click to see other captures with the same favicon"/>
|
||||||
|
|
Loading…
Reference in New Issue