mirror of https://github.com/CIRCL/lookyloo
new: Add visualisation for embedded resources.
parent
dab2c53269
commit
e5e4e4972e
|
@ -121,6 +121,14 @@ class Indexing():
|
||||||
pipeline.sadd(f'bh|{urlnode.body_hash}|captures', crawled_tree.uuid)
|
pipeline.sadd(f'bh|{urlnode.body_hash}|captures', crawled_tree.uuid)
|
||||||
# ZSet of all urlnode_UUIDs|full_url
|
# ZSet of all urlnode_UUIDs|full_url
|
||||||
pipeline.zincrby(f'bh|{urlnode.body_hash}|captures|{crawled_tree.uuid}', 1, f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
pipeline.zincrby(f'bh|{urlnode.body_hash}|captures|{crawled_tree.uuid}', 1, f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
||||||
|
if urlnode.embedded_ressources:
|
||||||
|
for mimetype, blobs in urlnode.embedded_ressources.items():
|
||||||
|
for h, body in blobs:
|
||||||
|
pipeline.zincrby('body_hashes', 1, h)
|
||||||
|
pipeline.zincrby(f'bh|{h}', 1, urlnode.hostname)
|
||||||
|
pipeline.sadd(f'bh|{h}|captures', crawled_tree.uuid)
|
||||||
|
pipeline.zincrby(f'bh|{h}|captures|{crawled_tree.uuid}', 1,
|
||||||
|
f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
||||||
|
|
||||||
pipeline.execute()
|
pipeline.execute()
|
||||||
|
|
||||||
|
@ -697,6 +705,31 @@ class Lookyloo():
|
||||||
for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
|
for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
|
||||||
return captures, domains
|
return captures, domains
|
||||||
|
|
||||||
|
def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> Dict[str, List[Tuple[str, str, str, str]]]:
|
||||||
|
captures_list: Dict[str, List[Tuple[str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
||||||
|
for h_capture_uuid, url_uuid, url_hostname, same_url in self.indexing.get_body_hash_captures(blob_hash, url):
|
||||||
|
if h_capture_uuid == capture_uuid:
|
||||||
|
# Skip self.
|
||||||
|
continue
|
||||||
|
cache = self.capture_cache(h_capture_uuid)
|
||||||
|
if cache:
|
||||||
|
if same_url:
|
||||||
|
captures_list['same_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
|
||||||
|
else:
|
||||||
|
captures_list['different_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
|
||||||
|
return captures_list
|
||||||
|
|
||||||
|
def _format_sane_js_response(self, lookup_table: Dict, h: str) -> Optional[Union[str, Tuple]]:
|
||||||
|
if lookup_table.get(h):
|
||||||
|
if isinstance(lookup_table[h], list):
|
||||||
|
libname, version, path = lookup_table[h][0].split("|")
|
||||||
|
other_files = len(lookup_table[h])
|
||||||
|
return libname, version, path, other_files
|
||||||
|
else:
|
||||||
|
# Predefined generic file
|
||||||
|
return lookup_table[h]
|
||||||
|
return None
|
||||||
|
|
||||||
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
||||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||||
if not capture_dir:
|
if not capture_dir:
|
||||||
|
@ -734,34 +767,37 @@ class Lookyloo():
|
||||||
|
|
||||||
if not url.empty_response:
|
if not url.empty_response:
|
||||||
# Index lookup
|
# Index lookup
|
||||||
|
# %%% Full body %%%
|
||||||
freq = self.indexing.body_hash_fequency(url.body_hash)
|
freq = self.indexing.body_hash_fequency(url.body_hash)
|
||||||
|
to_append['body_hash_details'] = freq
|
||||||
if freq['hash_freq'] > 1:
|
if freq['hash_freq'] > 1:
|
||||||
to_append['body_hash_details'] = freq
|
to_append['body_hash_details']['other_captures'] = self.hash_lookup(url.body_hash, url.name, capture_uuid)
|
||||||
|
|
||||||
captures_list: Dict[str, List[Tuple[str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
# %%% Embedded ressources %%%
|
||||||
for h_capture_uuid, url_uuid, url_hostname, same_url in self.indexing.get_body_hash_captures(url.body_hash, url.name):
|
if url.embedded_ressources:
|
||||||
if h_capture_uuid == capture_uuid:
|
to_append['embedded_ressources'] = {}
|
||||||
# Skip self.
|
for mimetype, blobs in url.embedded_ressources.items():
|
||||||
continue
|
for h, blob in blobs:
|
||||||
cache = self.capture_cache(h_capture_uuid)
|
if h in to_append['embedded_ressources']:
|
||||||
if cache:
|
# Skip duplicates
|
||||||
if same_url:
|
continue
|
||||||
captures_list['same_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
|
freq = self.indexing.body_hash_fequency(h)
|
||||||
else:
|
to_append['embedded_ressources'][h] = freq
|
||||||
captures_list['different_url'].append((h_capture_uuid, url_uuid, cache['title'], url_hostname))
|
to_append['embedded_ressources'][h]['type'] = mimetype
|
||||||
|
if freq['hash_freq'] > 1:
|
||||||
to_append['body_hash_details']['other_captures'] = captures_list
|
to_append['embedded_ressources'][h]['other_captures'] = self.hash_lookup(h, url.name, capture_uuid)
|
||||||
|
if hasattr(self, 'sanejs') and self.sanejs.available:
|
||||||
|
to_lookup = list(to_append['embedded_ressources'].keys())
|
||||||
|
sanejs_lookups_embedded = self.sanejs.hashes_lookup(to_lookup)
|
||||||
|
for h in to_append['embedded_ressources'].keys():
|
||||||
|
sane_js_match = self._format_sane_js_response(sanejs_lookups_embedded, h)
|
||||||
|
if sane_js_match:
|
||||||
|
to_append['embedded_ressources'][h]['sane_js'] = sane_js_match
|
||||||
|
|
||||||
# Optional: SaneJS information
|
# Optional: SaneJS information
|
||||||
if url.body_hash in sanejs_lookups:
|
sane_js_match = self._format_sane_js_response(sanejs_lookups, url.body_hash)
|
||||||
if sanejs_lookups[url.body_hash]:
|
if sane_js_match:
|
||||||
if isinstance(sanejs_lookups[url.body_hash], list):
|
to_append['sane_js'] = sane_js_match
|
||||||
libname, version, path = sanejs_lookups[url.body_hash][0].split("|")
|
|
||||||
other_files = len(sanejs_lookups[url.body_hash])
|
|
||||||
to_append['sane_js'] = (libname, version, path, other_files)
|
|
||||||
else:
|
|
||||||
# Predefined generic file
|
|
||||||
to_append['sane_js'] = sanejs_lookups[url.body_hash]
|
|
||||||
|
|
||||||
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
||||||
if hasattr(url, 'cookies_sent'):
|
if hasattr(url, 'cookies_sent'):
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
$(document).ready(function() {
|
$(document).ready(function() {
|
||||||
// Copy to clipboard
|
// Copy to clipboard
|
||||||
// Grab any text in the attribute 'data-copy' and pass it to the copy function
|
// Grab any text in the attribute 'data-copy' and pass it to the copy function
|
||||||
$('.js-copy').tooltip();
|
$('.js-copy').tooltip();
|
||||||
$('.js-copy').click(function() {
|
$('.js-copy').click(function() {
|
||||||
var text = $(this).attr('data-copy');
|
var text = $(this).attr('data-copy');
|
||||||
var el = $(this);
|
var el = $(this);
|
||||||
|
@ -71,6 +71,7 @@
|
||||||
</div>
|
</div>
|
||||||
<ul class="list-group">
|
<ul class="list-group">
|
||||||
<li class="list-group-item">
|
<li class="list-group-item">
|
||||||
|
{# Details of the response #}
|
||||||
<p class="h4">Response</p>
|
<p class="h4">Response</p>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
|
@ -171,6 +172,83 @@
|
||||||
<div>
|
<div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
{% if url['embedded_ressources'] %}
|
||||||
|
<div><b>This response contains embedded ressources</b></div>
|
||||||
|
<button class="btn btn-primary" type="button" data-toggle="collapse" data-target="#embedded_full_list_{{ url['url_object'].uuid }}" aria-expanded="false" aria-controls="collapseExample">
|
||||||
|
Toggle list of embedded resources.
|
||||||
|
</button>
|
||||||
|
<div class="collapse" id="embedded_full_list_{{ url['url_object'].uuid }}">
|
||||||
|
<div class="card card-body">
|
||||||
|
{% for hash, details in url['embedded_ressources'].items() %}
|
||||||
|
{% if details['sane_js'] %}
|
||||||
|
<div>
|
||||||
|
{% if details['sane_js'] is string %}
|
||||||
|
<b>{{ details['sane_js'] }} </b>
|
||||||
|
{% else %}
|
||||||
|
This file is known as part of <b>{{ details['sane_js'][0] }}</b>
|
||||||
|
version <b>{{ details['sane_js'][1] }}</b>: <b>{{ details['sane_js'][2] }}</b>.
|
||||||
|
{% if details['sane_js'][3] > 1%}
|
||||||
|
It is also present in <b>{{ details['sane_js'][3] -1 }}</b> other libraries.
|
||||||
|
{%endif%}
|
||||||
|
{%endif%}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
<div>
|
||||||
|
This file (<b>{{ details['type'] }}</b>) can be found <b>{{ details['hash_freq'] }}</b> times
|
||||||
|
across all the captures on this lookyloo instance, in <b>{{ details['hash_domains_freq'] }}</b> unique domains.
|
||||||
|
</br>
|
||||||
|
|
||||||
|
{% set total_captures = details['other_captures']['different_url']|length + details['other_captures']['same_url']|length %}
|
||||||
|
|
||||||
|
{% if total_captures > 0 %}
|
||||||
|
<p>
|
||||||
|
The same file was seen in <b>{{ total_captures }}</b> other captures.
|
||||||
|
<button class="btn btn-primary" type="button" data-toggle="collapse" data-target="#embeddedlist_{{ hash }}" aria-expanded="false" aria-controls="collapseExample">
|
||||||
|
Toggle list.
|
||||||
|
</button>
|
||||||
|
</p>
|
||||||
|
<div class="collapse" id="embeddedlist_{{ hash }}">
|
||||||
|
<div class="card card-body">
|
||||||
|
{% if details['other_captures']['different_url']|length > 0 %}
|
||||||
|
<div>
|
||||||
|
<p>The following captures get the same file from a <b>different URL</b></p>
|
||||||
|
<ul>
|
||||||
|
{% for capture_uuid, urlnode_uuid, title, hostname in details['other_captures']['different_url'] %}
|
||||||
|
<li>
|
||||||
|
<a href="{{ url_for('tree', tree_uuid=capture_uuid, urlnode_uuid=urlnode_uuid) }}">{{ title }}</a> - {{ hostname }}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if details['other_captures']['same_url']|length > 0 %}
|
||||||
|
</br>
|
||||||
|
<div>
|
||||||
|
<p>The following captures get the same file from the <b>same URL</b></p>
|
||||||
|
<ul>
|
||||||
|
{% for capture_uuid, urlnode_uuid, title, hostname in details['other_captures']['same_url'] %}
|
||||||
|
<li>
|
||||||
|
<a href="{{ url_for('tree', tree_uuid=capture_uuid, urlnode_uuid=urlnode_uuid) }}">{{ title }}</a> - {{ hostname }}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<p>This file is loaded multiple times in this capture.</p>
|
||||||
|
{% endif %}
|
||||||
|
<p><a href="{{ url_for('body_hash_details', body_hash=url['url_object'].body_hash) }}">
|
||||||
|
Show more information about this embedded content.
|
||||||
|
</a></p>
|
||||||
|
<div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% if url['cookies_received'] %}
|
{% if url['cookies_received'] %}
|
||||||
<div>
|
<div>
|
||||||
<p class="h5">Cookies</p>
|
<p class="h5">Cookies</p>
|
||||||
|
|
Loading…
Reference in New Issue