mirror of https://github.com/CIRCL/lookyloo
new: Add visualisation for embedded resources.
parent
dab2c53269
commit
e5e4e4972e
|
@ -121,6 +121,14 @@ class Indexing():
|
|||
pipeline.sadd(f'bh|{urlnode.body_hash}|captures', crawled_tree.uuid)
|
||||
# ZSet of all urlnode_UUIDs|full_url
|
||||
pipeline.zincrby(f'bh|{urlnode.body_hash}|captures|{crawled_tree.uuid}', 1, f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
||||
if urlnode.embedded_ressources:
|
||||
for mimetype, blobs in urlnode.embedded_ressources.items():
|
||||
for h, body in blobs:
|
||||
pipeline.zincrby('body_hashes', 1, h)
|
||||
pipeline.zincrby(f'bh|{h}', 1, urlnode.hostname)
|
||||
pipeline.sadd(f'bh|{h}|captures', crawled_tree.uuid)
|
||||
pipeline.zincrby(f'bh|{h}|captures|{crawled_tree.uuid}', 1,
|
||||
f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
||||
|
||||
pipeline.execute()
|
||||
|
||||
|
@ -697,6 +705,31 @@ class Lookyloo():
|
|||
for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
|
||||
return captures, domains
|
||||
|
||||
def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> Dict[str, List[Tuple[str, str, str, str]]]:
|
||||
captures_list: Dict[str, List[Tuple[str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
||||
for h_capture_uuid, url_uuid, url_hostname, same_url in self.indexing.get_body_hash_captures(blob_hash, url):
|
||||
if h_capture_uuid == capture_uuid:
|
||||
# Skip self.
|
||||
continue
|
||||
cache = self.capture_cache(h_capture_uuid)
|
||||
if cache:
|
||||
if same_url:
|
||||
captures_list['same_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
|
||||
else:
|
||||
captures_list['different_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
|
||||
return captures_list
|
||||
|
||||
def _format_sane_js_response(self, lookup_table: Dict, h: str) -> Optional[Union[str, Tuple]]:
|
||||
if lookup_table.get(h):
|
||||
if isinstance(lookup_table[h], list):
|
||||
libname, version, path = lookup_table[h][0].split("|")
|
||||
other_files = len(lookup_table[h])
|
||||
return libname, version, path, other_files
|
||||
else:
|
||||
# Predefined generic file
|
||||
return lookup_table[h]
|
||||
return None
|
||||
|
||||
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
|
@ -734,34 +767,37 @@ class Lookyloo():
|
|||
|
||||
if not url.empty_response:
|
||||
# Index lookup
|
||||
# %%% Full body %%%
|
||||
freq = self.indexing.body_hash_fequency(url.body_hash)
|
||||
to_append['body_hash_details'] = freq
|
||||
if freq['hash_freq'] > 1:
|
||||
to_append['body_hash_details'] = freq
|
||||
to_append['body_hash_details']['other_captures'] = self.hash_lookup(url.body_hash, url.name, capture_uuid)
|
||||
|
||||
captures_list: Dict[str, List[Tuple[str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
||||
for h_capture_uuid, url_uuid, url_hostname, same_url in self.indexing.get_body_hash_captures(url.body_hash, url.name):
|
||||
if h_capture_uuid == capture_uuid:
|
||||
# Skip self.
|
||||
continue
|
||||
cache = self.capture_cache(h_capture_uuid)
|
||||
if cache:
|
||||
if same_url:
|
||||
captures_list['same_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
|
||||
else:
|
||||
captures_list['different_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
|
||||
|
||||
to_append['body_hash_details']['other_captures'] = captures_list
|
||||
# %%% Embedded ressources %%%
|
||||
if url.embedded_ressources:
|
||||
to_append['embedded_ressources'] = {}
|
||||
for mimetype, blobs in url.embedded_ressources.items():
|
||||
for h, blob in blobs:
|
||||
if h in to_append['embedded_ressources']:
|
||||
# Skip duplicates
|
||||
continue
|
||||
freq = self.indexing.body_hash_fequency(h)
|
||||
to_append['embedded_ressources'][h] = freq
|
||||
to_append['embedded_ressources'][h]['type'] = mimetype
|
||||
if freq['hash_freq'] > 1:
|
||||
to_append['embedded_ressources'][h]['other_captures'] = self.hash_lookup(h, url.name, capture_uuid)
|
||||
if hasattr(self, 'sanejs') and self.sanejs.available:
|
||||
to_lookup = list(to_append['embedded_ressources'].keys())
|
||||
sanejs_lookups_embedded = self.sanejs.hashes_lookup(to_lookup)
|
||||
for h in to_append['embedded_ressources'].keys():
|
||||
sane_js_match = self._format_sane_js_response(sanejs_lookups_embedded, h)
|
||||
if sane_js_match:
|
||||
to_append['embedded_ressources'][h]['sane_js'] = sane_js_match
|
||||
|
||||
# Optional: SaneJS information
|
||||
if url.body_hash in sanejs_lookups:
|
||||
if sanejs_lookups[url.body_hash]:
|
||||
if isinstance(sanejs_lookups[url.body_hash], list):
|
||||
libname, version, path = sanejs_lookups[url.body_hash][0].split("|")
|
||||
other_files = len(sanejs_lookups[url.body_hash])
|
||||
to_append['sane_js'] = (libname, version, path, other_files)
|
||||
else:
|
||||
# Predefined generic file
|
||||
to_append['sane_js'] = sanejs_lookups[url.body_hash]
|
||||
sane_js_match = self._format_sane_js_response(sanejs_lookups, url.body_hash)
|
||||
if sane_js_match:
|
||||
to_append['sane_js'] = sane_js_match
|
||||
|
||||
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
||||
if hasattr(url, 'cookies_sent'):
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
$(document).ready(function() {
|
||||
// Copy to clipboard
|
||||
// Grab any text in the attribute 'data-copy' and pass it to the copy function
|
||||
$('.js-copy').tooltip();
|
||||
$('.js-copy').tooltip();
|
||||
$('.js-copy').click(function() {
|
||||
var text = $(this).attr('data-copy');
|
||||
var el = $(this);
|
||||
|
@ -71,6 +71,7 @@
|
|||
</div>
|
||||
<ul class="list-group">
|
||||
<li class="list-group-item">
|
||||
{# Details of the response #}
|
||||
<p class="h4">Response</p>
|
||||
|
||||
<div>
|
||||
|
@ -171,6 +172,83 @@
|
|||
<div>
|
||||
{% endif %}
|
||||
|
||||
{% if url['embedded_ressources'] %}
|
||||
<div><b>This response contains embedded ressources</b></div>
|
||||
<button class="btn btn-primary" type="button" data-toggle="collapse" data-target="#embedded_full_list_{{ url['url_object'].uuid }}" aria-expanded="false" aria-controls="collapseExample">
|
||||
Toggle list of embedded resources.
|
||||
</button>
|
||||
<div class="collapse" id="embedded_full_list_{{ url['url_object'].uuid }}">
|
||||
<div class="card card-body">
|
||||
{% for hash, details in url['embedded_ressources'].items() %}
|
||||
{% if details['sane_js'] %}
|
||||
<div>
|
||||
{% if details['sane_js'] is string %}
|
||||
<b>{{ details['sane_js'] }} </b>
|
||||
{% else %}
|
||||
This file is known as part of <b>{{ details['sane_js'][0] }}</b>
|
||||
version <b>{{ details['sane_js'][1] }}</b>: <b>{{ details['sane_js'][2] }}</b>.
|
||||
{% if details['sane_js'][3] > 1%}
|
||||
It is also present in <b>{{ details['sane_js'][3] -1 }}</b> other libraries.
|
||||
{%endif%}
|
||||
{%endif%}
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
This file (<b>{{ details['type'] }}</b>) can be found <b>{{ details['hash_freq'] }}</b> times
|
||||
across all the captures on this lookyloo instance, in <b>{{ details['hash_domains_freq'] }}</b> unique domains.
|
||||
</br>
|
||||
|
||||
{% set total_captures = details['other_captures']['different_url']|length + details['other_captures']['same_url']|length %}
|
||||
|
||||
{% if total_captures > 0 %}
|
||||
<p>
|
||||
The same file was seen in <b>{{ total_captures }}</b> other captures.
|
||||
<button class="btn btn-primary" type="button" data-toggle="collapse" data-target="#embeddedlist_{{ hash }}" aria-expanded="false" aria-controls="collapseExample">
|
||||
Toggle list.
|
||||
</button>
|
||||
</p>
|
||||
<div class="collapse" id="embeddedlist_{{ hash }}">
|
||||
<div class="card card-body">
|
||||
{% if details['other_captures']['different_url']|length > 0 %}
|
||||
<div>
|
||||
<p>The following captures get the same file from a <b>different URL</b></p>
|
||||
<ul>
|
||||
{% for capture_uuid, urlnode_uuid, title, hostname in details['other_captures']['different_url'] %}
|
||||
<li>
|
||||
<a href="{{ url_for('tree', tree_uuid=capture_uuid, urlnode_uuid=urlnode_uuid) }}">{{ title }}</a> - {{ hostname }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if details['other_captures']['same_url']|length > 0 %}
|
||||
</br>
|
||||
<div>
|
||||
<p>The following captures get the same file from the <b>same URL</b></p>
|
||||
<ul>
|
||||
{% for capture_uuid, urlnode_uuid, title, hostname in details['other_captures']['same_url'] %}
|
||||
<li>
|
||||
<a href="{{ url_for('tree', tree_uuid=capture_uuid, urlnode_uuid=urlnode_uuid) }}">{{ title }}</a> - {{ hostname }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<p>This file is loaded multiple times in this capture.</p>
|
||||
{% endif %}
|
||||
<p><a href="{{ url_for('body_hash_details', body_hash=url['url_object'].body_hash) }}">
|
||||
Show more information about this embedded content.
|
||||
</a></p>
|
||||
<div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if url['cookies_received'] %}
|
||||
<div>
|
||||
<p class="h5">Cookies</p>
|
||||
|
|
Loading…
Reference in New Issue