fix: Speedup display of hostnode popup

pull/474/head
Raphaël Vinot 2022-07-27 14:36:56 +02:00
parent bcfaaec941
commit b8cd1319a5
6 changed files with 19 additions and 8 deletions

View File

@ -10,7 +10,7 @@ from collections.abc import Mapping
from datetime import datetime from datetime import datetime
from functools import lru_cache from functools import lru_cache
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union, Set
import dns.rdatatype import dns.rdatatype
import dns.resolver import dns.resolver
@ -97,6 +97,10 @@ class CapturesIndex(Mapping):
self.__cache: Dict[str, CaptureCache] = {} self.__cache: Dict[str, CaptureCache] = {}
self._quick_init() self._quick_init()
@property
def cached_captures(self) -> Set[str]:
return set(self.__cache.keys())
def __getitem__(self, uuid: str) -> CaptureCache: def __getitem__(self, uuid: str) -> CaptureCache:
if uuid in self.__cache: if uuid in self.__cache:
if (self.__cache[uuid].capture_dir.exists() if (self.__cache[uuid].capture_dir.exists()

View File

@ -177,7 +177,14 @@ class Indexing():
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None, def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None,
filter_capture_uuid: Optional[str]=None, filter_capture_uuid: Optional[str]=None,
limit: int=20) -> Tuple[int, List[Tuple[str, str, str, bool]]]: limit: int=20,
prefered_uuids: Set[str]=set()) -> Tuple[int, List[Tuple[str, str, str, bool]]]:
'''Get the captures matching the hash.
:param filter_url: URL of the hash we're searching for
:param filter_capture_uuid: UUID of the capture the hash was found in
:param limit: Max matching captures to return
:param prefered_uuids: UUID cached right now, so we don't rebuild trees.
'''
to_return: List[Tuple[str, str, str, bool]] = [] to_return: List[Tuple[str, str, str, bool]] = []
all_captures: Set[str] = self.redis.smembers(f'bh|{body_hash}|captures') all_captures: Set[str] = self.redis.smembers(f'bh|{body_hash}|captures')
len_captures = len(all_captures) len_captures = len(all_captures)
@ -186,6 +193,8 @@ class Indexing():
# Used to skip hits in current capture # Used to skip hits in current capture
len_captures -= 1 len_captures -= 1
continue continue
if prefered_uuids and capture_uuid not in prefered_uuids:
continue
for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1): for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1):
url_uuid, hostnode_uuid, url = entry.split('|', 2) url_uuid, hostnode_uuid, url = entry.split('|', 2)
hostname: str = urlsplit(url).hostname hostname: str = urlsplit(url).hostname

View File

@ -633,7 +633,7 @@ class Lookyloo():
If a URL is given, it splits the results if the hash is seen on the same URL or an other one. If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
Capture UUID avoids duplicates on the same capture''' Capture UUID avoids duplicates on the same capture'''
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []} captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid) total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, prefered_uuids=self._captures_index.cached_captures)
for h_capture_uuid, url_uuid, url_hostname, same_url in details: for h_capture_uuid, url_uuid, url_hostname, same_url in details:
cache = self.capture_cache(h_capture_uuid) cache = self.capture_cache(h_capture_uuid)
if cache: if cache:

View File

@ -605,7 +605,7 @@ def send_mail(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET'])
def tree(tree_uuid: str, node_uuid: Optional[str]=None): def tree(tree_uuid: str, node_uuid: Optional[str]=None):
if tree_uuid == 'False': if tree_uuid == 'False':
flash("Unable to process your request. The domain may not exist, or splash isn't started", 'warning') flash("Unable to process your request.", 'warning')
return redirect(url_for('index')) return redirect(url_for('index'))
cache = lookyloo.capture_cache(tree_uuid) cache = lookyloo.capture_cache(tree_uuid)
if not cache: if not cache:

View File

@ -210,7 +210,7 @@
across all the captures on this lookyloo instance, in <b>{{ url['body_hash_details']['hash_domains_freq'] }}</b> unique domains. across all the captures on this lookyloo instance, in <b>{{ url['body_hash_details']['hash_domains_freq'] }}</b> unique domains.
</br> </br>
{# other captures related wit the same dontent #} {# other captures related with the same content #}
{% if 'other_captures' in url['body_hash_details'] %} {% if 'other_captures' in url['body_hash_details'] %}
{{ indexed_hash(url['body_hash_details']['other_captures'], url['url_object'].uuid) }} {{ indexed_hash(url['body_hash_details']['other_captures'], url['url_object'].uuid) }}
{% endif %} {% endif %}

View File

@ -185,9 +185,7 @@
{# Lists of other captures loading the same content... #} {# Lists of other captures loading the same content... #}
<div class="collapse" id="captureslist_{{ identifier_for_toggle }}"> <div class="collapse" id="captureslist_{{ identifier_for_toggle }}">
<div class="card card-body"> <div class="card card-body">
{% if total_captures > 20 %} Note that only the most recent captures are displayed here.
Note that only 20 captures are displayed here.
{% endif %}
{% if other_captures['different_url']|length > 0 %} {% if other_captures['different_url']|length > 0 %}
{# ... on other URLs #} {# ... on other URLs #}
<div> <div>