From 23419a31b97ad835b979571608338426b21ce90f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?=
Date: Mon, 29 Jun 2020 17:23:01 +0200
Subject: [PATCH] fix: cleanup
---
bin/rebuild_caches.py | 6 ++++--
lookyloo/lookyloo.py | 26 ++++-------------------
website/web/__init__.py | 2 +-
website/web/templates/hostname_popup.html | 13 ------------
4 files changed, 9 insertions(+), 38 deletions(-)
diff --git a/bin/rebuild_caches.py b/bin/rebuild_caches.py
index 027481e..060488f 100755
--- a/bin/rebuild_caches.py
+++ b/bin/rebuild_caches.py
@@ -4,6 +4,7 @@
import argparse
import logging
+from lookyloo.helpers import load_pickle_tree
from lookyloo.lookyloo import Lookyloo, Indexing
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
@@ -23,5 +24,6 @@ if __name__ == '__main__':
indexing = Indexing()
indexing.clear_indexes()
for capture_dir in lookyloo.capture_dirs:
- indexing.index_cookies_capture(capture_dir)
- indexing.index_body_hashes_capture(capture_dir)
+ tree = load_pickle_tree(capture_dir)
+ indexing.index_cookies_capture(tree)
+ indexing.index_body_hashes_capture(tree)
diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py
index b38c90d..4e0bb0d 100644
--- a/lookyloo/lookyloo.py
+++ b/lookyloo/lookyloo.py
@@ -58,17 +58,7 @@ class Indexing():
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
- def index_cookies_capture(self, capture_dir: Path) -> None:
- print(f'Index cookies {capture_dir}')
- try:
- crawled_tree = load_pickle_tree(capture_dir)
- except Exception as e:
- print(e)
- return
-
- if not crawled_tree:
- return
-
+ def index_cookies_capture(self, crawled_tree: CrawledTree) -> None:
if self.redis.sismember('indexed_cookies', crawled_tree.uuid):
# Do not reindex
return
@@ -115,17 +105,7 @@ class Indexing():
return {'hash_freq': self.redis.zscore('body_hashes', body_hash),
'hash_domains_freq': self.redis.zcard(f'bh|{body_hash}')}
- def index_body_hashes_capture(self, capture_dir: Path) -> None:
- print(f'Index body hashes {capture_dir}')
- try:
- crawled_tree = load_pickle_tree(capture_dir)
- except Exception as e:
- print(e)
- return
-
- if not crawled_tree:
- return
-
+ def index_body_hashes_capture(self, crawled_tree: CrawledTree) -> None:
if self.redis.sismember('indexed_body_hashes', crawled_tree.uuid):
# Do not reindex
return
@@ -239,6 +219,8 @@ class Lookyloo():
har_files = sorted(capture_dir.glob('*.har'))
try:
ct = CrawledTree(har_files, uuid)
+ self.indexing.index_cookies_capture(ct)
+ self.indexing.index_body_hashes_capture(ct)
except Har2TreeError as e:
raise NoValidHarFile(e.message)
diff --git a/website/web/__init__.py b/website/web/__init__.py
index 28ac601..814a93f 100644
--- a/website/web/__init__.py
+++ b/website/web/__init__.py
@@ -439,8 +439,8 @@ def body_hash_details(body_hash: str):
captures, domains = lookyloo.get_body_hash_investigator(body_hash)
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures)
-# Query API
+# Query API
@app.route('/json//redirects', methods=['GET'])
def json_redirects(tree_uuid: str):
diff --git a/website/web/templates/hostname_popup.html b/website/web/templates/hostname_popup.html
index 74884a6..9e4d517 100644
--- a/website/web/templates/hostname_popup.html
+++ b/website/web/templates/hostname_popup.html
@@ -170,19 +170,6 @@
{% endif %}
- {% if url['sane_js'] %}
-
- {% if url['sane_js'] is string %}
- {{ url['sane_js'] }}
- {% else %}
- This file is known as part of {{ url['sane_js'][0] }}
- version {{ url['sane_js'][1] }}: {{ url['sane_js'][2] }}.
- {% if url['sane_js'][3] > 1%}
- It is also present in {{ url['sane_js'][3] -1 }} other libraries.
- {%endif%}
- {%endif%}
-
- {% endif %}
{% if url['cookies_received'] %}