new: Add visualisation for embedded resources.

pull/78/head
Raphaël Vinot 2020-07-10 18:57:16 +02:00
parent dab2c53269
commit e5e4e4972e
2 changed files with 138 additions and 24 deletions

View File

@ -121,6 +121,14 @@ class Indexing():
pipeline.sadd(f'bh|{urlnode.body_hash}|captures', crawled_tree.uuid)
# ZSet of all urlnode_UUIDs|full_url
pipeline.zincrby(f'bh|{urlnode.body_hash}|captures|{crawled_tree.uuid}', 1, f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
if urlnode.embedded_ressources:
for mimetype, blobs in urlnode.embedded_ressources.items():
for h, body in blobs:
pipeline.zincrby('body_hashes', 1, h)
pipeline.zincrby(f'bh|{h}', 1, urlnode.hostname)
pipeline.sadd(f'bh|{h}|captures', crawled_tree.uuid)
pipeline.zincrby(f'bh|{h}|captures|{crawled_tree.uuid}', 1,
f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
pipeline.execute()
@ -697,6 +705,31 @@ class Lookyloo():
for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
return captures, domains
def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> Dict[str, List[Tuple[str, str, str, str]]]:
captures_list: Dict[str, List[Tuple[str, str, str, str]]] = {'same_url': [], 'different_url': []}
for h_capture_uuid, url_uuid, url_hostname, same_url in self.indexing.get_body_hash_captures(blob_hash, url):
if h_capture_uuid == capture_uuid:
# Skip self.
continue
cache = self.capture_cache(h_capture_uuid)
if cache:
if same_url:
captures_list['same_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
else:
captures_list['different_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
return captures_list
def _format_sane_js_response(self, lookup_table: Dict, h: str) -> Optional[Union[str, Tuple]]:
if lookup_table.get(h):
if isinstance(lookup_table[h], list):
libname, version, path = lookup_table[h][0].split("|")
other_files = len(lookup_table[h])
return libname, version, path, other_files
else:
# Predefined generic file
return lookup_table[h]
return None
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
@ -734,34 +767,37 @@ class Lookyloo():
if not url.empty_response:
# Index lookup
# %%% Full body %%%
freq = self.indexing.body_hash_fequency(url.body_hash)
to_append['body_hash_details'] = freq
if freq['hash_freq'] > 1:
to_append['body_hash_details'] = freq
to_append['body_hash_details']['other_captures'] = self.hash_lookup(url.body_hash, url.name, capture_uuid)
captures_list: Dict[str, List[Tuple[str, str, str, str]]] = {'same_url': [], 'different_url': []}
for h_capture_uuid, url_uuid, url_hostname, same_url in self.indexing.get_body_hash_captures(url.body_hash, url.name):
if h_capture_uuid == capture_uuid:
# Skip self.
continue
cache = self.capture_cache(h_capture_uuid)
if cache:
if same_url:
captures_list['same_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
else:
captures_list['different_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
to_append['body_hash_details']['other_captures'] = captures_list
# %%% Embedded ressources %%%
if url.embedded_ressources:
to_append['embedded_ressources'] = {}
for mimetype, blobs in url.embedded_ressources.items():
for h, blob in blobs:
if h in to_append['embedded_ressources']:
# Skip duplicates
continue
freq = self.indexing.body_hash_fequency(h)
to_append['embedded_ressources'][h] = freq
to_append['embedded_ressources'][h]['type'] = mimetype
if freq['hash_freq'] > 1:
to_append['embedded_ressources'][h]['other_captures'] = self.hash_lookup(h, url.name, capture_uuid)
if hasattr(self, 'sanejs') and self.sanejs.available:
to_lookup = list(to_append['embedded_ressources'].keys())
sanejs_lookups_embedded = self.sanejs.hashes_lookup(to_lookup)
for h in to_append['embedded_ressources'].keys():
sane_js_match = self._format_sane_js_response(sanejs_lookups_embedded, h)
if sane_js_match:
to_append['embedded_ressources'][h]['sane_js'] = sane_js_match
# Optional: SaneJS information
if url.body_hash in sanejs_lookups:
if sanejs_lookups[url.body_hash]:
if isinstance(sanejs_lookups[url.body_hash], list):
libname, version, path = sanejs_lookups[url.body_hash][0].split("|")
other_files = len(sanejs_lookups[url.body_hash])
to_append['sane_js'] = (libname, version, path, other_files)
else:
# Predefined generic file
to_append['sane_js'] = sanejs_lookups[url.body_hash]
sane_js_match = self._format_sane_js_response(sanejs_lookups, url.body_hash)
if sane_js_match:
to_append['sane_js'] = sane_js_match
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
if hasattr(url, 'cookies_sent'):

View File

@ -30,7 +30,7 @@
$(document).ready(function() {
// Copy to clipboard
// Grab any text in the attribute 'data-copy' and pass it to the copy function
$('.js-copy').tooltip();
$('.js-copy').tooltip();
$('.js-copy').click(function() {
var text = $(this).attr('data-copy');
var el = $(this);
@ -71,6 +71,7 @@
</div>
<ul class="list-group">
<li class="list-group-item">
{# Details of the response #}
<p class="h4">Response</p>
<div>
@ -171,6 +172,83 @@
<div>
{% endif %}
{% if url['embedded_ressources'] %}
<div><b>This response contains embedded ressources</b></div>
<button class="btn btn-primary" type="button" data-toggle="collapse" data-target="#embedded_full_list_{{ url['url_object'].uuid }}" aria-expanded="false" aria-controls="collapseExample">
Toggle list of embedded resources.
</button>
<div class="collapse" id="embedded_full_list_{{ url['url_object'].uuid }}">
<div class="card card-body">
{% for hash, details in url['embedded_ressources'].items() %}
{% if details['sane_js'] %}
<div>
{% if details['sane_js'] is string %}
<b>{{ details['sane_js'] }} </b>
{% else %}
This file is known as part of <b>{{ details['sane_js'][0] }}</b>
version <b>{{ details['sane_js'][1] }}</b>: <b>{{ details['sane_js'][2] }}</b>.
{% if details['sane_js'][3] > 1%}
It is also present in <b>{{ details['sane_js'][3] -1 }}</b> other libraries.
{%endif%}
{%endif%}
</div>
{% endif %}
<div>
This file (<b>{{ details['type'] }}</b>) can be found <b>{{ details['hash_freq'] }}</b> times
across all the captures on this lookyloo instance, in <b>{{ details['hash_domains_freq'] }}</b> unique domains.
</br>
{% set total_captures = details['other_captures']['different_url']|length + details['other_captures']['same_url']|length %}
{% if total_captures > 0 %}
<p>
The same file was seen in <b>{{ total_captures }}</b> other captures.
<button class="btn btn-primary" type="button" data-toggle="collapse" data-target="#embeddedlist_{{ hash }}" aria-expanded="false" aria-controls="collapseExample">
Toggle list.
</button>
</p>
<div class="collapse" id="embeddedlist_{{ hash }}">
<div class="card card-body">
{% if details['other_captures']['different_url']|length > 0 %}
<div>
<p>The following captures get the same file from a <b>different URL</b></p>
<ul>
{% for capture_uuid, urlnode_uuid, title, hostname in details['other_captures']['different_url'] %}
<li>
<a href="{{ url_for('tree', tree_uuid=capture_uuid, urlnode_uuid=urlnode_uuid) }}">{{ title }}</a> - {{ hostname }}
</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if details['other_captures']['same_url']|length > 0 %}
</br>
<div>
<p>The following captures get the same file from the <b>same URL</b></p>
<ul>
{% for capture_uuid, urlnode_uuid, title, hostname in details['other_captures']['same_url'] %}
<li>
<a href="{{ url_for('tree', tree_uuid=capture_uuid, urlnode_uuid=urlnode_uuid) }}">{{ title }}</a> - {{ hostname }}
</li>
{% endfor %}
</ul>
</div>
{% endif %}
</div>
</div>
{% else %}
<p>This file is loaded multiple times in this capture.</p>
{% endif %}
<p><a href="{{ url_for('body_hash_details', body_hash=url['url_object'].body_hash) }}">
Show more information about this embedded content.
</a></p>
<div>
{% endfor %}
</div>
</div>
{% endif %}
{% if url['cookies_received'] %}
<div>
<p class="h5">Cookies</p>