diff --git a/bin/rebuild_caches.py b/bin/rebuild_caches.py
index 027481e..060488f 100755
--- a/bin/rebuild_caches.py
+++ b/bin/rebuild_caches.py
@@ -4,6 +4,7 @@
import argparse
import logging
+from lookyloo.helpers import load_pickle_tree
from lookyloo.lookyloo import Lookyloo, Indexing
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
@@ -23,5 +24,6 @@ if __name__ == '__main__':
indexing = Indexing()
indexing.clear_indexes()
for capture_dir in lookyloo.capture_dirs:
- indexing.index_cookies_capture(capture_dir)
- indexing.index_body_hashes_capture(capture_dir)
+ tree = load_pickle_tree(capture_dir)
+ indexing.index_cookies_capture(tree)
+ indexing.index_body_hashes_capture(tree)
diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py
index b38c90d..4e0bb0d 100644
--- a/lookyloo/lookyloo.py
+++ b/lookyloo/lookyloo.py
@@ -58,17 +58,7 @@ class Indexing():
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
- def index_cookies_capture(self, capture_dir: Path) -> None:
- print(f'Index cookies {capture_dir}')
- try:
- crawled_tree = load_pickle_tree(capture_dir)
- except Exception as e:
- print(e)
- return
-
- if not crawled_tree:
- return
-
+ def index_cookies_capture(self, crawled_tree: CrawledTree) -> None:
if self.redis.sismember('indexed_cookies', crawled_tree.uuid):
# Do not reindex
return
@@ -115,17 +105,7 @@ class Indexing():
return {'hash_freq': self.redis.zscore('body_hashes', body_hash),
'hash_domains_freq': self.redis.zcard(f'bh|{body_hash}')}
- def index_body_hashes_capture(self, capture_dir: Path) -> None:
- print(f'Index body hashes {capture_dir}')
- try:
- crawled_tree = load_pickle_tree(capture_dir)
- except Exception as e:
- print(e)
- return
-
- if not crawled_tree:
- return
-
+ def index_body_hashes_capture(self, crawled_tree: CrawledTree) -> None:
if self.redis.sismember('indexed_body_hashes', crawled_tree.uuid):
# Do not reindex
return
@@ -239,6 +219,8 @@ class Lookyloo():
har_files = sorted(capture_dir.glob('*.har'))
try:
ct = CrawledTree(har_files, uuid)
+ self.indexing.index_cookies_capture(ct)
+ self.indexing.index_body_hashes_capture(ct)
except Har2TreeError as e:
raise NoValidHarFile(e.message)
diff --git a/website/web/__init__.py b/website/web/__init__.py
index 28ac601..814a93f 100644
--- a/website/web/__init__.py
+++ b/website/web/__init__.py
@@ -439,8 +439,8 @@ def body_hash_details(body_hash: str):
captures, domains = lookyloo.get_body_hash_investigator(body_hash)
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures)
-# Query API
+# Query API
@app.route('/json/