mirror of https://github.com/CIRCL/lookyloo
chg: speedup rendering of very big hostnode popups
parent
17e19a5f27
commit
acd4cb8da4
|
@ -234,6 +234,7 @@ class Indexing():
|
|||
if self.redis.type(f'cookies_names|{cookie_name}|captures') == 'set': # type: ignore[no-untyped-call]
|
||||
# triggers the re-index soon.
|
||||
self.redis.srem('indexed_cookies', *[entry.split('|')[0] for entry in self.redis.smembers(f'cn|{cookie_name}|captures')])
|
||||
self.redis.delete(f'cookies_names|{cookie_name}|captures')
|
||||
return []
|
||||
return self.redis.zrevrangebyscore(f'cookies_names|{cookie_name}|captures', max_score, min_score, withscores=True)
|
||||
|
||||
|
@ -310,6 +311,7 @@ class Indexing():
|
|||
if self.redis.type(f'bh|{h}|captures') == 'set': # type: ignore[no-untyped-call]
|
||||
# triggers the re-index soon.
|
||||
self.redis.srem('indexed_body_hashes', *self.redis.smembers(f'bh|{h}|captures'))
|
||||
self.redis.delete(f'bh|{h}|captures')
|
||||
return 0
|
||||
return self.redis.zcard(f'body_hashes|{h}|captures')
|
||||
|
||||
|
@ -408,6 +410,7 @@ class Indexing():
|
|||
if self.redis.type(f'hhhashes|{hhh}|captures') == 'set': # type: ignore[no-untyped-call]
|
||||
# triggers the re-index soon.
|
||||
self.redis.srem('indexed_hhhashes', *self.redis.smembers(f'hhhashes|{hhh}|captures'))
|
||||
self.redis.delete(f'hhhashes|{hhh}|captures')
|
||||
return []
|
||||
return self.redis.zrevrangebyscore(f'hhhashes|{hhh}|captures', max_score, min_score, withscores=True)
|
||||
|
||||
|
@ -499,6 +502,7 @@ class Indexing():
|
|||
if self.redis.type(f'urls|{md5}|captures') == 'set': # type: ignore[no-untyped-call]
|
||||
# triggers the re-index soon.
|
||||
self.redis.srem('indexed_urls', *self.redis.smembers(f'urls|{md5}|captures'))
|
||||
self.redis.delete(f'urls|{md5}|captures')
|
||||
return []
|
||||
return self.redis.zrevrangebyscore(f'urls|{md5}|captures', max_score, min_score, withscores=True)
|
||||
|
||||
|
@ -507,6 +511,7 @@ class Indexing():
|
|||
if self.redis.type(f'urls|{md5}|captures') == 'set': # type: ignore[no-untyped-call]
|
||||
# triggers the re-index soon.
|
||||
self.redis.srem('indexed_urls', *self.redis.smembers(f'urls|{md5}|captures'))
|
||||
self.redis.delete(f'urls|{md5}|captures')
|
||||
return 0
|
||||
return self.redis.zcard(f'urls|{md5}|captures')
|
||||
|
||||
|
@ -523,6 +528,7 @@ class Indexing():
|
|||
if self.redis.type(f'hostnames|{hostname}|captures') == 'set': # type: ignore[no-untyped-call]
|
||||
# triggers the re-index soon.
|
||||
self.redis.srem('indexed_urls', *self.redis.smembers(f'hostnames|{hostname}|captures'))
|
||||
self.redis.delete(f'hostnames|{hostname}|captures')
|
||||
return []
|
||||
return self.redis.zrevrangebyscore(f'hostnames|{hostname}|captures', max_score, min_score, withscores=True)
|
||||
|
||||
|
@ -530,6 +536,7 @@ class Indexing():
|
|||
if self.redis.type(f'hostnames|{hostname}|captures') == 'set': # type: ignore[no-untyped-call]
|
||||
# triggers the re-index soon.
|
||||
self.redis.srem('indexed_urls', *self.redis.smembers(f'hostnames|{hostname}|captures'))
|
||||
self.redis.delete(f'hostnames|{hostname}|captures')
|
||||
return 0
|
||||
return self.redis.zcard(f'hostnames|{hostname}|captures')
|
||||
|
||||
|
|
|
@ -512,14 +512,16 @@ def hash_lookup(blob_hash: str, url: str, current_capture_uuid: str) -> tuple[in
|
|||
Capture UUID avoids duplicates on the same capture'''
|
||||
captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
||||
cached_captures = lookyloo.sorted_capture_cache(
|
||||
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_body_hash(blob_hash)],
|
||||
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_body_hash(blob_hash,
|
||||
oldest_capture=datetime.now() - timedelta(**time_delta_on_index))],
|
||||
cached_captures_only=True)
|
||||
for cache in cached_captures:
|
||||
if cache.uuid == current_capture_uuid:
|
||||
continue
|
||||
for urlnode_uuid in get_indexing(flask_login.current_user).get_capture_body_hash_nodes(cache.uuid, blob_hash):
|
||||
urlnodes = get_indexing(flask_login.current_user).get_capture_body_hash_nodes(cache.uuid, blob_hash)
|
||||
for urlnode_uuid in urlnodes:
|
||||
try:
|
||||
urlnode = lookyloo.get_urlnode_from_tree(cache.uuid, urlnode_uuid)
|
||||
urlnode = cache.tree.root_hartree.get_url_node_by_uuid(urlnode_uuid)
|
||||
except IndexError:
|
||||
continue
|
||||
if url == urlnode.name:
|
||||
|
@ -578,8 +580,9 @@ def get_hostnode_investigator(capture_uuid: str, /, node_uuid: str) -> tuple[Hos
|
|||
# Index lookup
|
||||
# %%% Full body %%%
|
||||
if freq := get_indexing(flask_login.current_user).get_captures_body_hash_count(url.body_hash):
|
||||
to_append['body_hash_details'] = {'hash_freq': freq}
|
||||
to_append['body_hash_details']['other_captures'] = hash_lookup(url.body_hash, url.name, capture_uuid)
|
||||
to_append['body_hash_details'] = {'hash_freq': freq, 'other_captures': (freq, {'same_url': [], 'different_url': []})}
|
||||
if freq > 1:
|
||||
to_append['body_hash_details']['other_captures'] = hash_lookup(url.body_hash, url.name, capture_uuid)
|
||||
|
||||
# %%% Embedded ressources %%%
|
||||
if hasattr(url, 'embedded_ressources') and url.embedded_ressources:
|
||||
|
@ -589,10 +592,13 @@ def get_hostnode_investigator(capture_uuid: str, /, node_uuid: str) -> tuple[Hos
|
|||
if h in to_append['embedded_ressources']:
|
||||
# Skip duplicates
|
||||
continue
|
||||
to_append['embedded_ressources'][h] = {'body_size': blob.getbuffer().nbytes, 'type': mimetype}
|
||||
to_append['embedded_ressources'][h] = {'body_size': blob.getbuffer().nbytes,
|
||||
'type': mimetype}
|
||||
if freq := get_indexing(flask_login.current_user).get_captures_body_hash_count(h):
|
||||
to_append['embedded_ressources'][h]['hash_freq'] = freq
|
||||
to_append['embedded_ressources'][h]['other_captures'] = hash_lookup(h, url.name, capture_uuid)
|
||||
to_append['embedded_ressources'][h]['other_captures'] = (freq, {'same_url': [], 'different_url': []})
|
||||
if freq > 1:
|
||||
to_append['embedded_ressources'][h]['other_captures'] = hash_lookup(h, url.name, capture_uuid)
|
||||
for h in to_append['embedded_ressources'].keys():
|
||||
known, legitimate = normalize_known_content(h, known_content, url)
|
||||
if known:
|
||||
|
|
|
@ -192,7 +192,7 @@
|
|||
{% set total_captures = details[0] %}
|
||||
{% set other_captures = details[1] %}
|
||||
{# Only show details if the hits are in an other capture #}
|
||||
{% if total_captures > 1 %}
|
||||
{% if total_captures > 1 %}
|
||||
<p>
|
||||
The same file was seen in <b>{{ total_captures - 1 }}</b> other captures.
|
||||
</br>
|
||||
|
@ -206,7 +206,7 @@
|
|||
{# Lists of other captures loading the same content... #}
|
||||
<div class="collapse" id="captureslist_{{ identifier_for_toggle }}">
|
||||
<div class="card card-body">
|
||||
Note that only the most recent cached captures are displayed here.
|
||||
Note that only the most recent cached captures are displayed here, click on the link below to see more.
|
||||
{% if other_captures['different_url']|length > 0 %}
|
||||
{# ... on other URLs #}
|
||||
<div>
|
||||
|
|
Loading…
Reference in New Issue