mirror of https://github.com/CIRCL/lookyloo
new: Add favicons in indexer
parent
a795d08456
commit
4153138644
|
@ -136,6 +136,7 @@ class BackgroundIndexer(AbstractManager):
|
|||
p.sismember('indexed_body_hashes', cache.uuid)
|
||||
p.sismember('indexed_cookies', cache.uuid)
|
||||
p.sismember('indexed_hhhashes', cache.uuid)
|
||||
p.sismember('indexed_favicons', cache.uuid)
|
||||
indexed = p.execute()
|
||||
if all(indexed):
|
||||
continue
|
||||
|
@ -158,6 +159,10 @@ class BackgroundIndexer(AbstractManager):
|
|||
if not indexed[3]:
|
||||
self.logger.info(f'Indexing HH Hashes for {cache.uuid}')
|
||||
self.lookyloo.indexing.index_http_headers_hashes_capture(ct)
|
||||
if not indexed[4]:
|
||||
self.logger.info(f'Indexing favicons for {cache.uuid}')
|
||||
favicons = self.lookyloo.get_potential_favicons(cache.uuid, all_favicons=True, for_datauri=False)
|
||||
self.lookyloo.indexing.index_favicons_capture(cache.uuid, favicons)
|
||||
# NOTE: categories aren't taken in account here, should be fixed(?)
|
||||
# see indexing.index_categories_capture(capture_uuid, categories)
|
||||
index_redis.delete('ongoing_indexing')
|
||||
|
|
|
@ -5,9 +5,11 @@ from __future__ import annotations
|
|||
import hashlib
|
||||
import logging
|
||||
# import re
|
||||
from io import BytesIO
|
||||
from collections import defaultdict
|
||||
from typing import Iterable
|
||||
from urllib.parse import urlsplit
|
||||
from zipfile import ZipFile
|
||||
|
||||
from har2tree import CrawledTree
|
||||
from redis import ConnectionPool, Redis
|
||||
|
@ -22,12 +24,18 @@ class Indexing():
|
|||
def __init__(self) -> None:
|
||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||
self.redis_pool_bytes: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
|
||||
path=get_socket_path('indexing'))
|
||||
self.redis_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
|
||||
path=get_socket_path('indexing'), decode_responses=True)
|
||||
|
||||
def clear_indexes(self) -> None:
|
||||
self.redis.flushdb()
|
||||
|
||||
@property
|
||||
def redis_bytes(self) -> Redis: # type: ignore[type-arg]
|
||||
return Redis(connection_pool=self.redis_pool_bytes)
|
||||
|
||||
@property
|
||||
def redis(self) -> Redis: # type: ignore[type-arg]
|
||||
return Redis(connection_pool=self.redis_pool)
|
||||
|
@ -325,6 +333,42 @@ class Indexing():
|
|||
def get_captures_hostname(self, hostname: str) -> set[str]:
|
||||
return self.redis.smembers(f'hostnames|{hostname}|captures')
|
||||
|
||||
# ###### favicons ######
|
||||
|
||||
@property
|
||||
def favicons(self) -> list[tuple[str, float]]:
|
||||
return self.redis.zrevrange('favicons', 0, 200, withscores=True)
|
||||
|
||||
def favicon_number_captures(self, favicon_sha512: str) -> int:
|
||||
return self.redis.scard(f'favicons|{favicon_sha512}|captures')
|
||||
|
||||
def index_favicons_capture(self, capture_uuid: str, favicons: BytesIO) -> None:
|
||||
if self.redis.sismember('indexed_favicons', capture_uuid):
|
||||
# Do not reindex
|
||||
return
|
||||
self.redis.sadd('indexed_favicons', capture_uuid)
|
||||
pipeline = self.redis.pipeline()
|
||||
with ZipFile(favicons, 'r') as myzip:
|
||||
for name in myzip.namelist():
|
||||
if not name.endswith('.ico'):
|
||||
continue
|
||||
favicon = myzip.read(name)
|
||||
if not favicon:
|
||||
# Empty file, ignore.
|
||||
continue
|
||||
sha = hashlib.sha512(favicon).hexdigest()
|
||||
pipeline.zincrby('favicons', 1, sha)
|
||||
pipeline.sadd(f'favicons|{sha}|captures', capture_uuid)
|
||||
# There is no easi access to the favicons unless we store them in redis
|
||||
pipeline.set(f'favicons|{sha}', favicon)
|
||||
pipeline.execute()
|
||||
|
||||
def get_captures_favicon(self, favicon_sha512: str) -> set[str]:
|
||||
return self.redis.smembers(f'favicons|{favicon_sha512}|captures')
|
||||
|
||||
def get_favicon(self, favicon_sha512: str) -> bytes | None:
|
||||
return self.redis_bytes.get(f'favicons|{favicon_sha512}')
|
||||
|
||||
# ###### Categories ######
|
||||
|
||||
@property
|
||||
|
|
|
@ -1046,6 +1046,13 @@ class Lookyloo():
|
|||
for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
|
||||
return captures, domains
|
||||
|
||||
def get_favicon_investigator(self, favicon_sha512: str, /) -> tuple[list[tuple[str, str]], bytes | None]:
|
||||
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
||||
cached_captures = self.sorted_capture_cache([uuid for uuid in self.indexing.get_captures_favicon(favicon_sha512)])
|
||||
captures = [(cache.uuid, cache.title) for cache in cached_captures]
|
||||
favicon = self.indexing.get_favicon(favicon_sha512)
|
||||
return captures, favicon
|
||||
|
||||
def get_hhh_investigator(self, hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
|
||||
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
||||
all_captures = dict(self.indexing.get_http_headers_hashes_captures(hhh))
|
||||
|
|
|
@ -945,6 +945,19 @@ def hhhashes_lookup() -> str:
|
|||
return render_template('hhhashes.html', hhhashes=hhhashes)
|
||||
|
||||
|
||||
@app.route('/favicons', methods=['GET'])
|
||||
def favicons_lookup() -> str:
|
||||
favicons = []
|
||||
for sha512, freq in lookyloo.indexing.favicons:
|
||||
favicon = lookyloo.indexing.get_favicon(sha512)
|
||||
if not favicon:
|
||||
continue
|
||||
favicon_b64 = base64.b64encode(favicon).decode()
|
||||
nb_captures = lookyloo.indexing.favicon_number_captures(sha512)
|
||||
favicons.append((sha512, freq, nb_captures, favicon_b64))
|
||||
return render_template('favicons.html', favicons=favicons)
|
||||
|
||||
|
||||
@app.route('/ressources', methods=['GET'])
|
||||
def ressources() -> str:
|
||||
ressources = []
|
||||
|
@ -1206,6 +1219,17 @@ def hhh_detail(hhh: str) -> str:
|
|||
return render_template('hhh_details.html', hhh=hhh, captures=captures, headers=headers)
|
||||
|
||||
|
||||
@app.route('/favicon_details/<string:favicon_sha512>', methods=['GET'])
|
||||
def favicon_detail(favicon_sha512: str) -> str:
|
||||
captures, favicon = lookyloo.get_favicon_investigator(favicon_sha512.strip())
|
||||
if favicon:
|
||||
b64_favicon = base64.b64encode(favicon).decode()
|
||||
else:
|
||||
b64_favicon = ''
|
||||
return render_template('favicon_details.html', favicon_sha512=favicon_sha512,
|
||||
captures=captures, b64_favicon=b64_favicon)
|
||||
|
||||
|
||||
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
||||
def body_hash_details(body_hash: str) -> str:
|
||||
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
|
||||
|
|
Loading…
Reference in New Issue