From b8cd1319a56f5fd84604f9cc1cbccec45197e436 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 27 Jul 2022 14:36:56 +0200 Subject: [PATCH] fix: Speedup display of hostnode popup --- lookyloo/capturecache.py | 6 +++++- lookyloo/indexing.py | 11 ++++++++++- lookyloo/lookyloo.py | 2 +- website/web/__init__.py | 2 +- website/web/templates/hostname_popup.html | 2 +- website/web/templates/macros.html | 4 +--- 6 files changed, 19 insertions(+), 8 deletions(-) diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index b59cecfd..290329e5 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -10,7 +10,7 @@ from collections.abc import Mapping from datetime import datetime from functools import lru_cache from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union, Set import dns.rdatatype import dns.resolver @@ -97,6 +97,10 @@ class CapturesIndex(Mapping): self.__cache: Dict[str, CaptureCache] = {} self._quick_init() + @property + def cached_captures(self) -> Set[str]: + return set(self.__cache.keys()) + def __getitem__(self, uuid: str) -> CaptureCache: if uuid in self.__cache: if (self.__cache[uuid].capture_dir.exists() diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index 9a01e362..dd6c74ed 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -177,7 +177,14 @@ class Indexing(): def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None, filter_capture_uuid: Optional[str]=None, - limit: int=20) -> Tuple[int, List[Tuple[str, str, str, bool]]]: + limit: int=20, + prefered_uuids: Set[str]=set()) -> Tuple[int, List[Tuple[str, str, str, bool]]]: + '''Get the captures matching the hash. + :param filter_url: URL of the hash we're searching for + :param filter_capture_uuid: UUID of the capture the hash was found in + :param limit: Max matching captures to return + :param prefered_uuids: UUID cached right now, so we don't rebuild trees. + ''' to_return: List[Tuple[str, str, str, bool]] = [] all_captures: Set[str] = self.redis.smembers(f'bh|{body_hash}|captures') len_captures = len(all_captures) @@ -186,6 +193,8 @@ class Indexing(): # Used to skip hits in current capture len_captures -= 1 continue + if prefered_uuids and capture_uuid not in prefered_uuids: + continue for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1): url_uuid, hostnode_uuid, url = entry.split('|', 2) hostname: str = urlsplit(url).hostname diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 76ee33be..56eee85c 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -633,7 +633,7 @@ class Lookyloo(): If a URL is given, it splits the results if the hash is seen on the same URL or an other one. Capture UUID avoids duplicates on the same capture''' captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []} - total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid) + total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, prefered_uuids=self._captures_index.cached_captures) for h_capture_uuid, url_uuid, url_hostname, same_url in details: cache = self.capture_cache(h_capture_uuid) if cache: diff --git a/website/web/__init__.py b/website/web/__init__.py index 73ce11d4..9a3dc34c 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -605,7 +605,7 @@ def send_mail(tree_uuid: str): @app.route('/tree//', methods=['GET']) def tree(tree_uuid: str, node_uuid: Optional[str]=None): if tree_uuid == 'False': - flash("Unable to process your request. The domain may not exist, or splash isn't started", 'warning') + flash("Unable to process your request.", 'warning') return redirect(url_for('index')) cache = lookyloo.capture_cache(tree_uuid) if not cache: diff --git a/website/web/templates/hostname_popup.html b/website/web/templates/hostname_popup.html index 866d2eda..53956042 100644 --- a/website/web/templates/hostname_popup.html +++ b/website/web/templates/hostname_popup.html @@ -210,7 +210,7 @@ across all the captures on this lookyloo instance, in {{ url['body_hash_details']['hash_domains_freq'] }} unique domains.
- {# other captures related wit the same dontent #} + {# other captures related with the same content #} {% if 'other_captures' in url['body_hash_details'] %} {{ indexed_hash(url['body_hash_details']['other_captures'], url['url_object'].uuid) }} {% endif %} diff --git a/website/web/templates/macros.html b/website/web/templates/macros.html index 2a6e5e7f..115ec6bf 100644 --- a/website/web/templates/macros.html +++ b/website/web/templates/macros.html @@ -185,9 +185,7 @@ {# Lists of other captures loading the same content... #}
- {% if total_captures > 20 %} - Note that only 20 captures are displayed here. - {% endif %} + Note that only the most recent captures are displayed here. {% if other_captures['different_url']|length > 0 %} {# ... on other URLs #}