mirror of https://github.com/CIRCL/lookyloo
new: Download ressource from ressources page
parent
7ed4733797
commit
407a9a5511
|
@ -107,6 +107,13 @@ class Indexing():
|
||||||
|
|
||||||
# ###### Body hashes ######
|
# ###### Body hashes ######
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ressources(self) -> List[Tuple[str, float]]:
|
||||||
|
return self.redis.zrevrange('body_hashes', 0, 200, withscores=True)
|
||||||
|
|
||||||
|
def ressources_number_domains(self, h: str) -> int:
|
||||||
|
return self.redis.zcard(f'bh|{h}')
|
||||||
|
|
||||||
def body_hash_fequency(self, body_hash: str) -> Dict[str, float]:
|
def body_hash_fequency(self, body_hash: str) -> Dict[str, float]:
|
||||||
return {'hash_freq': self.redis.zscore('body_hashes', body_hash),
|
return {'hash_freq': self.redis.zscore('body_hashes', body_hash),
|
||||||
'hash_domains_freq': self.redis.zcard(f'bh|{body_hash}')}
|
'hash_domains_freq': self.redis.zcard(f'bh|{body_hash}')}
|
||||||
|
@ -119,25 +126,22 @@ class Indexing():
|
||||||
|
|
||||||
pipeline = self.redis.pipeline()
|
pipeline = self.redis.pipeline()
|
||||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||||
if urlnode.empty_response:
|
for h in urlnode.resources_hashes:
|
||||||
continue
|
pipeline.zincrby('body_hashes', 1, h)
|
||||||
pipeline.zincrby('body_hashes', 1, urlnode.body_hash)
|
pipeline.zincrby(f'bh|{h}', 1, urlnode.hostname)
|
||||||
pipeline.zincrby(f'bh|{urlnode.body_hash}', 1, urlnode.hostname)
|
# set of all captures with this hash
|
||||||
# set of all captures with this hash
|
pipeline.sadd(f'bh|{h}|captures', crawled_tree.uuid)
|
||||||
pipeline.sadd(f'bh|{urlnode.body_hash}|captures', crawled_tree.uuid)
|
# ZSet of all urlnode_UUIDs|full_url
|
||||||
# ZSet of all urlnode_UUIDs|full_url
|
pipeline.zincrby(f'bh|{h}|captures|{crawled_tree.uuid}', 1, f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
||||||
pipeline.zincrby(f'bh|{urlnode.body_hash}|captures|{crawled_tree.uuid}', 1, f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
|
||||||
if hasattr(urlnode, 'embedded_ressources') and urlnode.embedded_ressources:
|
|
||||||
for mimetype, blobs in urlnode.embedded_ressources.items():
|
|
||||||
for h, body in blobs:
|
|
||||||
pipeline.zincrby('body_hashes', 1, h)
|
|
||||||
pipeline.zincrby(f'bh|{h}', 1, urlnode.hostname)
|
|
||||||
pipeline.sadd(f'bh|{h}|captures', crawled_tree.uuid)
|
|
||||||
pipeline.zincrby(f'bh|{h}|captures|{crawled_tree.uuid}', 1,
|
|
||||||
f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
|
||||||
|
|
||||||
pipeline.execute()
|
pipeline.execute()
|
||||||
|
|
||||||
|
def get_hash_uuids(self, body_hash: str) -> Tuple[str, str]:
|
||||||
|
capture_uuid = self.redis.srandmember(f'bh|{body_hash}|captures')
|
||||||
|
entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0]
|
||||||
|
urlnode_uuid, hostnode_uuid, url = entry.split('|', 2)
|
||||||
|
return capture_uuid, urlnode_uuid
|
||||||
|
|
||||||
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None,
|
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None,
|
||||||
limit: int=20) -> Tuple[int, List[Tuple[str, str, str, bool]]]:
|
limit: int=20) -> Tuple[int, List[Tuple[str, str, str, bool]]]:
|
||||||
to_return: List[Tuple[str, str, str, bool]] = []
|
to_return: List[Tuple[str, str, str, bool]] = []
|
||||||
|
@ -208,9 +212,12 @@ class Context():
|
||||||
p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
|
p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
|
||||||
p.execute()
|
p.execute()
|
||||||
|
|
||||||
def find_known_content(self, har2tree_container: Union[CrawledTree, HostNode, URLNode]) -> Dict[str, Any]:
|
def find_known_content(self, har2tree_container: Union[CrawledTree, HostNode, URLNode, str]) -> Dict[str, Any]:
|
||||||
"""Return a dictionary of content resources found in the local known_content database, or in SaneJS (if enabled)"""
|
"""Return a dictionary of content resources found in the local known_content database, or in SaneJS (if enabled)"""
|
||||||
to_lookup: Set[str] = self._get_resources_hashes(har2tree_container)
|
if isinstance(har2tree_container, str):
|
||||||
|
to_lookup: Set[str] = {har2tree_container, }
|
||||||
|
else:
|
||||||
|
to_lookup: Set[str] = self._get_resources_hashes(har2tree_container)
|
||||||
known_content_table: Dict[str, Any] = {}
|
known_content_table: Dict[str, Any] = {}
|
||||||
if not to_lookup:
|
if not to_lookup:
|
||||||
return known_content_table
|
return known_content_table
|
||||||
|
@ -1093,6 +1100,22 @@ class Lookyloo():
|
||||||
|
|
||||||
return known, legitimate
|
return known, legitimate
|
||||||
|
|
||||||
|
def get_ressource(self, tree_uuid: str, urlnode_uuid: str, h: Optional[str]) -> Optional[Tuple[str, BytesIO]]:
|
||||||
|
url = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid)
|
||||||
|
if url.empty_response:
|
||||||
|
return None
|
||||||
|
if not h or h == url.body_hash:
|
||||||
|
# we want the body
|
||||||
|
return url.filename if url.filename else 'file.bin', url.body
|
||||||
|
|
||||||
|
# We want an embedded ressource
|
||||||
|
if h not in url.resources_hashes:
|
||||||
|
return None
|
||||||
|
for mimetype, blobs in url.embedded_ressources.items():
|
||||||
|
for ressource_h, blob in blobs:
|
||||||
|
if ressource_h == h:
|
||||||
|
return 'embedded_ressource.bin', blob
|
||||||
|
|
||||||
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
||||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||||
if not capture_dir:
|
if not capture_dir:
|
||||||
|
|
|
@ -248,42 +248,20 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
||||||
as_attachment=True, attachment_filename='posted_data.txt')
|
as_attachment=True, attachment_filename='posted_data.txt')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/embedded_ressource', methods=['POST'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET'])
|
||||||
def get_embedded_ressource(tree_uuid: str, node_uuid: str):
|
def get_ressource(tree_uuid: str, node_uuid: str):
|
||||||
url = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
if request.method == 'POST':
|
||||||
h_request = request.form.get('ressource_hash')
|
h_request = request.form.get('ressource_hash')
|
||||||
for mimetype, blobs in url.embedded_ressources.items():
|
else:
|
||||||
for h, blob in blobs:
|
h_request = None
|
||||||
if h == h_request:
|
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_request)
|
||||||
to_return = BytesIO()
|
|
||||||
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
|
||||||
zfile.writestr('file.bin', blob.getvalue())
|
|
||||||
to_return.seek(0)
|
|
||||||
return send_file(to_return, mimetype='application/zip',
|
|
||||||
as_attachment=True, attachment_filename='file.zip')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>', methods=['GET'])
|
|
||||||
def urlnode_details(tree_uuid: str, node_uuid: str):
|
|
||||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
|
||||||
to_return = BytesIO()
|
to_return = BytesIO()
|
||||||
got_content = False
|
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
||||||
if hasattr(urlnode, 'body'):
|
if ressource:
|
||||||
body_content = urlnode.body.getvalue()
|
filename, r = ressource
|
||||||
if body_content:
|
zfile.writestr(filename, r.getvalue())
|
||||||
got_content = True
|
else:
|
||||||
if hasattr(urlnode, 'json') and urlnode.json:
|
zfile.writestr('file.txt', b'Unknown Hash')
|
||||||
try:
|
|
||||||
loaded = json.loads(body_content)
|
|
||||||
body_content = json.dumps(loaded, indent=2).encode()
|
|
||||||
except Exception:
|
|
||||||
# Not json, but junk
|
|
||||||
pass
|
|
||||||
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
|
||||||
zfile.writestr(urlnode.filename, body_content)
|
|
||||||
if not got_content:
|
|
||||||
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
|
||||||
zfile.writestr('file.txt', b'Response body empty')
|
|
||||||
to_return.seek(0)
|
to_return.seek(0)
|
||||||
return send_file(to_return, mimetype='application/zip',
|
return send_file(to_return, mimetype='application/zip',
|
||||||
as_attachment=True, attachment_filename='file.zip')
|
as_attachment=True, attachment_filename='file.zip')
|
||||||
|
@ -490,6 +468,18 @@ def cookies_lookup():
|
||||||
return render_template('cookies.html', cookies_names=cookies_names)
|
return render_template('cookies.html', cookies_names=cookies_names)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/ressources', methods=['GET'])
|
||||||
|
def ressources():
|
||||||
|
i = Indexing()
|
||||||
|
ressources = []
|
||||||
|
for h, freq in i.ressources:
|
||||||
|
domain_freq = i.ressources_number_domains(h)
|
||||||
|
context = lookyloo.context.find_known_content(h)
|
||||||
|
capture_uuid, url_uuid = i.get_hash_uuids(h)
|
||||||
|
ressources.append((h, freq, domain_freq, context.get(h), capture_uuid, url_uuid))
|
||||||
|
return render_template('ressources.html', ressources=ressources)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
|
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
|
||||||
def cookies_name_detail(cookie_name: str):
|
def cookies_name_detail(cookie_name: str):
|
||||||
captures, domains = lookyloo.get_cookie_name_investigator(cookie_name)
|
captures, domains = lookyloo.get_cookie_name_investigator(cookie_name)
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
{% from "macros.html" import popup_icons %}
|
{% from "macros.html" import popup_icons %}
|
||||||
{% from "macros.html" import shorten_string %}
|
{% from "macros.html" import shorten_string %}
|
||||||
{% from "macros.html" import other_captures_table %}
|
{% from "macros.html" import other_captures_table %}
|
||||||
|
{% from "macros.html" import get_ressource_button %}
|
||||||
|
|
||||||
{% block title %}Details for {{ hostname }} {% endblock %}
|
{% block title %}Details for {{ hostname }} {% endblock %}
|
||||||
|
|
||||||
|
@ -220,9 +221,7 @@
|
||||||
<div>
|
<div>
|
||||||
This file (<b>{{ details['type'] }}</b>) can be found <b>{{ details['hash_freq'] }}</b> times
|
This file (<b>{{ details['type'] }}</b>) can be found <b>{{ details['hash_freq'] }}</b> times
|
||||||
across all the captures on this lookyloo instance, in <b>{{ details['hash_domains_freq'] }}</b> unique domains.
|
across all the captures on this lookyloo instance, in <b>{{ details['hash_domains_freq'] }}</b> unique domains.
|
||||||
<form method="post" action="{{ url_for('get_embedded_ressource', tree_uuid=tree_uuid, node_uuid=url['url_object'].uuid) }}">
|
{{ get_ressource_button(tree_uuid, url['url_object'].uuid, hash, 'Download the embedded ressource') }}
|
||||||
<button class="btn btn-primary" name="ressource_hash" value="{{ hash }}">Download the embedded ressource</button>
|
|
||||||
</form>
|
|
||||||
</br>
|
</br>
|
||||||
|
|
||||||
{% if 'other_captures' in details %}
|
{% if 'other_captures' in details %}
|
||||||
|
|
|
@ -12,6 +12,12 @@
|
||||||
</div>
|
</div>
|
||||||
{% endmacro %}
|
{% endmacro %}
|
||||||
|
|
||||||
|
{% macro get_ressource_button(capture_uuid, urlnode_uuid, hash, text) %}
|
||||||
|
<form method="post" action="{{ url_for('get_ressource', tree_uuid=capture_uuid, node_uuid=urlnode_uuid) }}">
|
||||||
|
<button class="btn btn-primary" name="ressource_hash" value="{{ hash }}">{{ text }}</button>
|
||||||
|
</form>
|
||||||
|
{% endmacro %}
|
||||||
|
|
||||||
{% macro ressource_legitimacy_details(details, ressource_size) %}
|
{% macro ressource_legitimacy_details(details, ressource_size) %}
|
||||||
{% if details and details[0] == False %}
|
{% if details and details[0] == False %}
|
||||||
<img src="/static/bomb.svg" title="Known malicious content in the response." width="21" height="21"/>
|
<img src="/static/bomb.svg" title="Known malicious content in the response." width="21" height="21"/>
|
||||||
|
@ -140,7 +146,7 @@ Body size: {{ sizeof_fmt(ressource_size) }}
|
||||||
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
|
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
|
||||||
</a>
|
</a>
|
||||||
{% elif key in ["js", "exe", "css", "font", "html", "json", "image", "video", "unknown_mimetype", "text", "unset_mimetype", "octet-stream", "livestream"] and not urlnode.empty_response %}
|
{% elif key in ["js", "exe", "css", "font", "html", "json", "image", "video", "unknown_mimetype", "text", "unset_mimetype", "octet-stream", "livestream"] and not urlnode.empty_response %}
|
||||||
<a href="{{ url_for('urlnode_details', tree_uuid=tree_uuid, node_uuid=urlnode.uuid) }}" title="Download the content of the response">
|
<a href="{{ url_for('get_ressource', tree_uuid=tree_uuid, node_uuid=urlnode.uuid) }}" title="Download the content of the response">
|
||||||
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
|
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
|
||||||
</a>
|
</a>
|
||||||
{% elif key == "redirect" %}
|
{% elif key == "redirect" %}
|
||||||
|
|
Loading…
Reference in New Issue