chg: Speedup hostnode popup

pull/828/head
Raphaël Vinot 2023-11-06 16:16:09 +01:00
parent b06ae9de65
commit 77d5cab1e8
2 changed files with 7 additions and 4 deletions

View File

@ -193,21 +193,22 @@ class Indexing():
limit: int=20, limit: int=20,
prefered_uuids: Set[str]=set()) -> Tuple[int, List[Tuple[str, str, str, bool]]]: prefered_uuids: Set[str]=set()) -> Tuple[int, List[Tuple[str, str, str, bool]]]:
'''Get the captures matching the hash. '''Get the captures matching the hash.
:param filter_url: URL of the hash we're searching for :param filter_url: URL of the hash we're searching for
:param filter_capture_uuid: UUID of the capture the hash was found in :param filter_capture_uuid: UUID of the capture the hash was found in
:param limit: Max matching captures to return :param limit: Max matching captures to return
:param prefered_uuids: UUID cached right now, so we don't rebuild trees. :param prefered_uuids: UUID cached right now, so we don't rebuild trees.
''' '''
to_return: List[Tuple[str, str, str, bool]] = [] to_return: List[Tuple[str, str, str, bool]] = []
all_captures: Set[str] = self.redis.smembers(f'bh|{body_hash}|captures') len_captures = self.redis.scard(f'bh|{body_hash}|captures')
len_captures = len(all_captures) for capture_uuid in self.redis.sscan_iter(f'bh|{body_hash}|captures'):
for capture_uuid in list(all_captures)[:limit]:
if capture_uuid == filter_capture_uuid: if capture_uuid == filter_capture_uuid:
# Used to skip hits in current capture # Used to skip hits in current capture
len_captures -= 1 len_captures -= 1
continue continue
if prefered_uuids and capture_uuid not in prefered_uuids: if prefered_uuids and capture_uuid not in prefered_uuids:
continue continue
limit -= 1
for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1): for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1):
url_uuid, hostnode_uuid, url = entry.split('|', 2) url_uuid, hostnode_uuid, url = entry.split('|', 2)
hostname: str = urlsplit(url).hostname hostname: str = urlsplit(url).hostname
@ -215,6 +216,8 @@ class Indexing():
to_return.append((capture_uuid, hostnode_uuid, hostname, url == filter_url)) to_return.append((capture_uuid, hostnode_uuid, hostname, url == filter_url))
else: else:
to_return.append((capture_uuid, hostnode_uuid, hostname, False)) to_return.append((capture_uuid, hostnode_uuid, hostname, False))
if limit <= 0:
break
return len_captures, to_return return len_captures, to_return
def get_body_hash_domains(self, body_hash: str) -> List[Tuple[str, float]]: def get_body_hash_domains(self, body_hash: str) -> List[Tuple[str, float]]:

View File

@ -185,7 +185,7 @@
{# Lists of other captures loading the same content... #} {# Lists of other captures loading the same content... #}
<div class="collapse" id="captureslist_{{ identifier_for_toggle }}"> <div class="collapse" id="captureslist_{{ identifier_for_toggle }}">
<div class="card card-body"> <div class="card card-body">
Note that only the most recent 20 captures are displayed here. Note that only the most recent cached captures are displayed here.
{% if other_captures['different_url']|length > 0 %} {% if other_captures['different_url']|length > 0 %}
{# ... on other URLs #} {# ... on other URLs #}
<div> <div>