fix: Speedup display of hostnode popup

pull/474/head
Raphaël Vinot 2022-07-27 14:36:56 +02:00
parent bcfaaec941
commit b8cd1319a5
6 changed files with 19 additions and 8 deletions

View File

@ -10,7 +10,7 @@ from collections.abc import Mapping
from datetime import datetime
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union, Set
import dns.rdatatype
import dns.resolver
@ -97,6 +97,10 @@ class CapturesIndex(Mapping):
self.__cache: Dict[str, CaptureCache] = {}
self._quick_init()
@property
def cached_captures(self) -> Set[str]:
return set(self.__cache.keys())
def __getitem__(self, uuid: str) -> CaptureCache:
if uuid in self.__cache:
if (self.__cache[uuid].capture_dir.exists()

View File

@ -177,7 +177,14 @@ class Indexing():
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None,
filter_capture_uuid: Optional[str]=None,
limit: int=20) -> Tuple[int, List[Tuple[str, str, str, bool]]]:
limit: int=20,
prefered_uuids: Set[str]=set()) -> Tuple[int, List[Tuple[str, str, str, bool]]]:
'''Get the captures matching the hash.
:param filter_url: URL of the hash we're searching for
:param filter_capture_uuid: UUID of the capture the hash was found in
:param limit: Max matching captures to return
:param prefered_uuids: UUID cached right now, so we don't rebuild trees.
'''
to_return: List[Tuple[str, str, str, bool]] = []
all_captures: Set[str] = self.redis.smembers(f'bh|{body_hash}|captures')
len_captures = len(all_captures)
@ -186,6 +193,8 @@ class Indexing():
# Used to skip hits in current capture
len_captures -= 1
continue
if prefered_uuids and capture_uuid not in prefered_uuids:
continue
for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1):
url_uuid, hostnode_uuid, url = entry.split('|', 2)
hostname: str = urlsplit(url).hostname

View File

@ -633,7 +633,7 @@ class Lookyloo():
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
Capture UUID avoids duplicates on the same capture'''
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid)
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, prefered_uuids=self._captures_index.cached_captures)
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
cache = self.capture_cache(h_capture_uuid)
if cache:

View File

@ -605,7 +605,7 @@ def send_mail(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET'])
def tree(tree_uuid: str, node_uuid: Optional[str]=None):
if tree_uuid == 'False':
flash("Unable to process your request. The domain may not exist, or splash isn't started", 'warning')
flash("Unable to process your request.", 'warning')
return redirect(url_for('index'))
cache = lookyloo.capture_cache(tree_uuid)
if not cache:

View File

@ -210,7 +210,7 @@
across all the captures on this lookyloo instance, in <b>{{ url['body_hash_details']['hash_domains_freq'] }}</b> unique domains.
</br>
{# other captures related wit the same dontent #}
{# other captures related with the same content #}
{% if 'other_captures' in url['body_hash_details'] %}
{{ indexed_hash(url['body_hash_details']['other_captures'], url['url_object'].uuid) }}
{% endif %}

View File

@ -185,9 +185,7 @@
{# Lists of other captures loading the same content... #}
<div class="collapse" id="captureslist_{{ identifier_for_toggle }}">
<div class="card card-body">
{% if total_captures > 20 %}
Note that only 20 captures are displayed here.
{% endif %}
Note that only the most recent captures are displayed here.
{% if other_captures['different_url']|length > 0 %}
{# ... on other URLs #}
<div>