mirror of https://github.com/CIRCL/lookyloo
chg: Normalize known content lookup
parent
4d6237aae5
commit
1b0cdde84e
|
@ -843,13 +843,6 @@ class Lookyloo():
|
||||||
captures_list['different_url'].append((h_capture_uuid, url_uuid, cache['title'], cache['timestamp'], url_hostname))
|
captures_list['different_url'].append((h_capture_uuid, url_uuid, cache['title'], cache['timestamp'], url_hostname))
|
||||||
return captures_list
|
return captures_list
|
||||||
|
|
||||||
def _format_sane_js_response(self, lookup_table: Dict, h: str) -> Optional[Union[str, Tuple]]:
|
|
||||||
if lookup_table.get(h):
|
|
||||||
libname, version, path = lookup_table[h][0].split("|")
|
|
||||||
other_files = len(lookup_table[h])
|
|
||||||
return libname, version, path, other_files
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
||||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||||
if not capture_dir:
|
if not capture_dir:
|
||||||
|
@ -862,13 +855,21 @@ class Lookyloo():
|
||||||
if not hostnode:
|
if not hostnode:
|
||||||
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
|
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
|
||||||
|
|
||||||
# search in locally defined known content
|
# Gather all the ressources in the hostnode.
|
||||||
# 1. get from cache all descriptions related to a body hash
|
all_ressources_hashes = set()
|
||||||
to_lookup = [url.body_hash for url in hostnode.urls if hasattr(url, 'body_hash')]
|
for url in hostnode.urls:
|
||||||
known_content_table = dict(zip(to_lookup, self.redis.hmget('known_content', to_lookup)))
|
if hasattr(url, 'body_hash'):
|
||||||
|
all_ressources_hashes.add(url.body_hash)
|
||||||
|
if hasattr(url, 'embedded_ressources') and url.embedded_ressources:
|
||||||
|
for mimetype, blobs in url.embedded_ressources.items():
|
||||||
|
all_ressources_hashes.update([h for h, b in blobs])
|
||||||
|
|
||||||
|
# Get from local cache of known content all descriptions related to the ressources.
|
||||||
|
known_content_table = dict(zip(all_ressources_hashes,
|
||||||
|
self.redis.hmget('known_content', all_ressources_hashes)))
|
||||||
|
|
||||||
# 2. query sanejs if enabled
|
|
||||||
if hasattr(self, 'sanejs') and self.sanejs.available:
|
if hasattr(self, 'sanejs') and self.sanejs.available:
|
||||||
|
# Query sanejs on the remaining ones
|
||||||
to_lookup = [h for h, description in known_content_table.items() if not description]
|
to_lookup = [h for h, description in known_content_table.items() if not description]
|
||||||
for h, entry in self.sanejs.hashes_lookup(to_lookup).items():
|
for h, entry in self.sanejs.hashes_lookup(to_lookup).items():
|
||||||
libname, version, path = entry[0].split("|")
|
libname, version, path = entry[0].split("|")
|
||||||
|
@ -908,17 +909,12 @@ class Lookyloo():
|
||||||
to_append['embedded_ressources'][h]['type'] = mimetype
|
to_append['embedded_ressources'][h]['type'] = mimetype
|
||||||
if freq_embedded['hash_freq'] > 1:
|
if freq_embedded['hash_freq'] > 1:
|
||||||
to_append['embedded_ressources'][h]['other_captures'] = self.hash_lookup(h, url.name, capture_uuid)
|
to_append['embedded_ressources'][h]['other_captures'] = self.hash_lookup(h, url.name, capture_uuid)
|
||||||
if hasattr(self, 'sanejs') and self.sanejs.available:
|
for h in to_append['embedded_ressources'].keys():
|
||||||
to_lookup = list(to_append['embedded_ressources'].keys())
|
if h in known_content_table:
|
||||||
sanejs_lookups_embedded = self.sanejs.hashes_lookup(to_lookup)
|
to_append['embedded_ressources'][h]['known_content'] = known_content_table[h]
|
||||||
for h in to_append['embedded_ressources'].keys():
|
|
||||||
sane_js_match = self._format_sane_js_response(sanejs_lookups_embedded, h)
|
|
||||||
if sane_js_match:
|
|
||||||
to_append['embedded_ressources'][h]['sane_js'] = sane_js_match
|
|
||||||
|
|
||||||
# Optional: SaneJS information
|
|
||||||
if url.body_hash in known_content_table:
|
if url.body_hash in known_content_table:
|
||||||
to_append['sane_js'] = known_content_table[url.body_hash]
|
to_append['known_content'] = known_content_table[url.body_hash]
|
||||||
|
|
||||||
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
||||||
if hasattr(url, 'cookies_sent'):
|
if hasattr(url, 'cookies_sent'):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
{% extends "main.html" %}
|
{% extends "main.html" %}
|
||||||
{% from "macros.html" import sanejs_details %}
|
{% from "macros.html" import known_content_details %}
|
||||||
{% from "macros.html" import indexed_hash %}
|
{% from "macros.html" import indexed_hash %}
|
||||||
{% from "macros.html" import indexed_cookies %}
|
{% from "macros.html" import indexed_cookies %}
|
||||||
{% from "macros.html" import popup_icons %}
|
{% from "macros.html" import popup_icons %}
|
||||||
|
@ -150,9 +150,8 @@
|
||||||
{%endif%}
|
{%endif%}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% if url['sane_js'] %}
|
{% if url['known_content'] %}
|
||||||
{# Result from SaneJS for the response #}
|
{{ known_content_details(url['known_content']) }}
|
||||||
{{ sanejs_details(url['sane_js']) }}
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{# Everything we know about the response content #}
|
{# Everything we know about the response content #}
|
||||||
|
@ -225,9 +224,8 @@
|
||||||
<div class="collapse" id="embedded_full_list_{{ url['url_object'].uuid }}">
|
<div class="collapse" id="embedded_full_list_{{ url['url_object'].uuid }}">
|
||||||
<div class="card card-body">
|
<div class="card card-body">
|
||||||
{% for hash, details in url['embedded_ressources'].items() %}
|
{% for hash, details in url['embedded_ressources'].items() %}
|
||||||
{% if details['sane_js'] %}
|
{% if details['known_content'] %}
|
||||||
{# Result from SaneJS for the embedded ressources #}
|
{{ known_content_details(details['known_content']) }}
|
||||||
{{ sanejs_details(details['sane_js']) }}
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div>
|
<div>
|
||||||
This file (<b>{{ details['type'] }}</b>) can be found <b>{{ details['hash_freq'] }}</b> times
|
This file (<b>{{ details['type'] }}</b>) can be found <b>{{ details['hash_freq'] }}</b> times
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
{% macro sanejs_details(details) %}
|
{% macro known_content_details(details) %}
|
||||||
<div>
|
<div>
|
||||||
{% if details is string %}
|
{% if details is string %}
|
||||||
<b>{{ details }} </b>
|
<b>{{ details }} </b>
|
||||||
|
|
Loading…
Reference in New Issue