mirror of https://github.com/CIRCL/lookyloo
chg: Cleanup and improve index rendering
parent
7772706262
commit
29c78d3485
|
@ -22,4 +22,6 @@ if __name__ == '__main__':
|
|||
|
||||
indexing = Indexing()
|
||||
indexing.clear_indexes()
|
||||
indexing.index_all()
|
||||
for capture_dir in lookyloo.capture_dirs:
|
||||
indexing.index_cookies_capture(capture_dir)
|
||||
indexing.index_body_hashes_capture(capture_dir)
|
||||
|
|
|
@ -35,22 +35,11 @@ from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
|
|||
class Indexing():
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.lookyloo = Lookyloo()
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True)
|
||||
|
||||
def clear_indexes(self):
|
||||
self.redis.flushdb()
|
||||
|
||||
def index_all(self):
|
||||
self.index_cookies()
|
||||
self.index_body_hashes()
|
||||
|
||||
def get_capture_cache(self, capture_uuid: str) -> Optional[Dict[str, Any]]:
|
||||
capture_dir = self.lookyloo.lookup_capture_dir(capture_uuid)
|
||||
if capture_dir:
|
||||
return self.lookyloo.capture_cache(capture_dir)
|
||||
return {}
|
||||
|
||||
# ###### Cookies ######
|
||||
|
||||
@property
|
||||
|
@ -72,7 +61,7 @@ class Indexing():
|
|||
def index_cookies_capture(self, capture_dir: Path) -> None:
|
||||
print(f'Index cookies {capture_dir}')
|
||||
try:
|
||||
crawled_tree = self.lookyloo.get_crawled_tree(capture_dir)
|
||||
crawled_tree = Lookyloo.get_crawled_tree(capture_dir)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return
|
||||
|
@ -101,10 +90,6 @@ class Indexing():
|
|||
pipeline.sadd(domain, name)
|
||||
pipeline.execute()
|
||||
|
||||
def index_cookies(self) -> None:
|
||||
for capture_dir in self.lookyloo.capture_dirs:
|
||||
self.index_cookies_capture(capture_dir)
|
||||
|
||||
def aggregate_domain_cookies(self):
|
||||
psl = publicsuffix2.PublicSuffixList()
|
||||
pipeline = self.redis.pipeline()
|
||||
|
@ -130,7 +115,7 @@ class Indexing():
|
|||
def index_body_hashes_capture(self, capture_dir: Path) -> None:
|
||||
print(f'Index body hashes {capture_dir}')
|
||||
try:
|
||||
crawled_tree = self.lookyloo.get_crawled_tree(capture_dir)
|
||||
crawled_tree = Lookyloo.get_crawled_tree(capture_dir)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return
|
||||
|
@ -152,20 +137,13 @@ class Indexing():
|
|||
|
||||
pipeline.execute()
|
||||
|
||||
def index_body_hashes(self) -> None:
|
||||
for capture_dir in self.lookyloo.capture_dirs:
|
||||
self.index_body_hashes_capture(capture_dir)
|
||||
|
||||
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None) -> List[str]:
|
||||
if not filter_url:
|
||||
return self.redis.smembers(f'bh|{body_hash}|captures')
|
||||
# We only want the captures if the hash match on a different URL
|
||||
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None) -> List[Tuple[str, str, str]]:
|
||||
to_return = []
|
||||
for capture_uuid in self.redis.smembers(f'bh|{body_hash}|captures'):
|
||||
for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1):
|
||||
url_uuid, url = entry.split('|', 1)
|
||||
if url != filter_url:
|
||||
to_return.append(capture_uuid)
|
||||
if filter_url is None or url != filter_url:
|
||||
to_return.append((capture_uuid, url_uuid, urlsplit(url).hostname))
|
||||
return to_return
|
||||
|
||||
def get_body_hash_domains(self, body_hash: str) -> List[Tuple[str, float]]:
|
||||
|
@ -283,12 +261,6 @@ class Lookyloo():
|
|||
ct = self.get_crawled_tree(capture_uuid)
|
||||
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
|
||||
|
||||
def remove_pickle(self, capture_uuid: str) -> None:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
remove_pickle_tree(capture_dir)
|
||||
|
||||
def rebuild_cache(self) -> None:
|
||||
self.redis.flushdb()
|
||||
self._init_existing_dumps()
|
||||
|
@ -708,6 +680,27 @@ class Lookyloo():
|
|||
self._set_capture_cache(dirpath)
|
||||
return perma_uuid
|
||||
|
||||
def get_body_hash_investigator(self, body_hash: str) -> Tuple[List[Tuple[str, str]], List[Tuple[str, float]]]:
|
||||
indexing = Indexing()
|
||||
captures = []
|
||||
for capture_uuid, url_uuid, url_hostname in indexing.get_body_hash_captures(body_hash):
|
||||
cache = self.get_capture_cache(capture_uuid)
|
||||
if cache:
|
||||
captures.append((capture_uuid, cache['title']))
|
||||
domains = indexing.get_body_hash_domains(body_hash)
|
||||
return captures, domains
|
||||
|
||||
def get_cookie_name_investigator(self, cookie_name: str):
|
||||
indexing = Indexing()
|
||||
captures = []
|
||||
for capture_uuid, url_uuid in indexing.get_cookies_names_captures(cookie_name):
|
||||
cache = self.get_capture_cache(capture_uuid)
|
||||
if cache:
|
||||
captures.append((capture_uuid, cache['title']))
|
||||
domains = [(domain, freq, indexing.cookies_names_domains_values(cookie_name, domain))
|
||||
for domain, freq in indexing.get_cookie_domains(cookie_name)]
|
||||
return captures, domains
|
||||
|
||||
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
|
@ -750,8 +743,14 @@ class Lookyloo():
|
|||
freq = indexing.body_hash_fequency(url.body_hash)
|
||||
if freq['hash_freq'] > 1:
|
||||
to_append['body_hash_details'] = freq
|
||||
to_append['body_hash_details']['other_captures'] = [indexing.get_capture_cache(capture)
|
||||
for capture in indexing.get_body_hash_captures(url.body_hash, url.name)]
|
||||
|
||||
captures_list: List[Tuple[str, str, str]] = []
|
||||
for capture_uuid, url_uuid, url_hostname in indexing.get_body_hash_captures(url.body_hash, url.name):
|
||||
cache = self.get_capture_cache(capture_uuid)
|
||||
if cache:
|
||||
captures_list.append((capture_uuid, cache['title'], url_hostname))
|
||||
|
||||
to_append['body_hash_details']['other_captures'] = captures_list
|
||||
|
||||
# Optional: SaneJS information
|
||||
if url.body_hash in sanejs_lookups:
|
||||
|
|
|
@ -430,18 +430,13 @@ def cookies_lookup():
|
|||
|
||||
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
|
||||
def cookies_name_detail(cookie_name: str):
|
||||
i = Indexing()
|
||||
captures = [i.get_capture_cache(capture) for capture, url in i.get_cookies_names_captures(cookie_name)]
|
||||
domains = [(domain, freq, i.cookies_names_domains_values(cookie_name, domain))
|
||||
for domain, freq in i.get_cookie_domains(cookie_name)]
|
||||
captures, domains = lookyloo.get_cookie_name_investigator(cookie_name)
|
||||
return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains, captures=captures)
|
||||
|
||||
|
||||
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
||||
def body_hash_details(body_hash: str):
|
||||
i = Indexing()
|
||||
captures = [i.get_capture_cache(capture) for capture in i.get_body_hash_captures(body_hash)]
|
||||
domains = i.get_body_hash_domains(body_hash)
|
||||
captures, domains = lookyloo.get_body_hash_investigator(body_hash)
|
||||
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures)
|
||||
|
||||
# Query API
|
||||
|
|
|
@ -46,8 +46,8 @@
|
|||
</div>
|
||||
<p>The same file was seen in these captures:</p>
|
||||
<ul>
|
||||
{% for capture in captures %}
|
||||
<li><a href="{{ url_for('tree', tree_uuid=capture['uuid']) }}">{{ capture['title'] }}</a></li>
|
||||
{% for capture_uuid, title in captures %}
|
||||
<li><a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">{{ title }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endblock %}
|
||||
|
|
|
@ -54,8 +54,8 @@
|
|||
</div>
|
||||
<p>A cookie with that name was seen in these captures:</p>
|
||||
<ul>
|
||||
{% for capture in captures %}
|
||||
<li><a href="{{ url_for('tree', tree_uuid=capture['uuid']) }}">{{ capture['title'] }}</a></li>
|
||||
{% for capture_uuid, title in captures %}
|
||||
<li><a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">{{ title }}</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endblock %}
|
||||
|
|
|
@ -103,22 +103,47 @@
|
|||
</div>
|
||||
|
||||
|
||||
{% if url['sane_js'] %}
|
||||
<div>
|
||||
{% if url['sane_js'] is string %}
|
||||
<b>{{ url['sane_js'] }} </b>
|
||||
{% else %}
|
||||
This file is known as part of <b>{{ url['sane_js'][0] }}</b>
|
||||
version <b>{{ url['sane_js'][1] }}</b>: <b>{{ url['sane_js'][2] }}</b>.
|
||||
{% if url['sane_js'][3] > 1%}
|
||||
It is also present in <b>{{ url['sane_js'][3] -1 }}</b> other libraries.
|
||||
{%endif%}
|
||||
{%endif%}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% if url['body_hash_details'] %}
|
||||
<div>
|
||||
This file can be found <b>{{ url['body_hash_details']['hash_freq'] }}</b> times
|
||||
across all the captures on this lookyloo instance, in <b>{{ url['body_hash_details']['hash_domains_freq'] }}</b> unique domains.
|
||||
</br>
|
||||
{% if url['body_hash_details']['other_captures'] %}
|
||||
<p>The same file was seen in these captures:</p>
|
||||
<ul>
|
||||
{% for capture in url['body_hash_details']['other_captures'] %}
|
||||
<li><a href="{{ url_for('tree', tree_uuid=capture['uuid']) }}">{{ capture['title'] }}</a></li>
|
||||
<p>
|
||||
The same file was seen in <b>{{ url['body_hash_details']['other_captures']|length }}</b> other captures.
|
||||
<button class="btn btn-primary" type="button" data-toggle="collapse" data-target="#captureslist_{{ url['url_object'].uuid }}" aria-expanded="false" aria-controls="collapseExample">
|
||||
Toggle list.
|
||||
</button>
|
||||
</p>
|
||||
<div class="collapse" id="captureslist_{{ url['url_object'].uuid }}">
|
||||
<div class="card card-body">
|
||||
<ul>
|
||||
{% for capture_uuid, title, hostname in url['body_hash_details']['other_captures'] %}
|
||||
<li><a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">{{ title }}</a> - {{ hostname }} </li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<p>All the captures are loading it from the same URL.</p>
|
||||
{% endif %}
|
||||
<a href="{{ url_for('body_hash_details', body_hash=url['url_object'].body_hash) }}">
|
||||
Show details
|
||||
</a>
|
||||
<p><a href="{{ url_for('body_hash_details', body_hash=url['url_object'].body_hash) }}">
|
||||
Show more information about this response body.
|
||||
</a></p>
|
||||
<div>
|
||||
{% endif %}
|
||||
{% if url['sane_js'] %}
|
||||
|
@ -135,7 +160,6 @@
|
|||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
||||
{% if url['cookies_received'] %}
|
||||
<div>
|
||||
<p class="h5">Cookies</p>
|
||||
|
|
Loading…
Reference in New Issue