diff --git a/bin/rebuild_caches.py b/bin/rebuild_caches.py index 027481e..060488f 100755 --- a/bin/rebuild_caches.py +++ b/bin/rebuild_caches.py @@ -4,6 +4,7 @@ import argparse import logging +from lookyloo.helpers import load_pickle_tree from lookyloo.lookyloo import Lookyloo, Indexing logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', @@ -23,5 +24,6 @@ if __name__ == '__main__': indexing = Indexing() indexing.clear_indexes() for capture_dir in lookyloo.capture_dirs: - indexing.index_cookies_capture(capture_dir) - indexing.index_body_hashes_capture(capture_dir) + tree = load_pickle_tree(capture_dir) + indexing.index_cookies_capture(tree) + indexing.index_body_hashes_capture(tree) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index b38c90d..4e0bb0d 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -58,17 +58,7 @@ class Indexing(): def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]: return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')] - def index_cookies_capture(self, capture_dir: Path) -> None: - print(f'Index cookies {capture_dir}') - try: - crawled_tree = load_pickle_tree(capture_dir) - except Exception as e: - print(e) - return - - if not crawled_tree: - return - + def index_cookies_capture(self, crawled_tree: CrawledTree) -> None: if self.redis.sismember('indexed_cookies', crawled_tree.uuid): # Do not reindex return @@ -115,17 +105,7 @@ class Indexing(): return {'hash_freq': self.redis.zscore('body_hashes', body_hash), 'hash_domains_freq': self.redis.zcard(f'bh|{body_hash}')} - def index_body_hashes_capture(self, capture_dir: Path) -> None: - print(f'Index body hashes {capture_dir}') - try: - crawled_tree = load_pickle_tree(capture_dir) - except Exception as e: - print(e) - return - - if not crawled_tree: - return - + def index_body_hashes_capture(self, crawled_tree: CrawledTree) -> None: if self.redis.sismember('indexed_body_hashes', crawled_tree.uuid): # Do not reindex return @@ -239,6 +219,8 @@ class Lookyloo(): har_files = sorted(capture_dir.glob('*.har')) try: ct = CrawledTree(har_files, uuid) + self.indexing.index_cookies_capture(ct) + self.indexing.index_body_hashes_capture(ct) except Har2TreeError as e: raise NoValidHarFile(e.message) diff --git a/website/web/__init__.py b/website/web/__init__.py index 28ac601..814a93f 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -439,8 +439,8 @@ def body_hash_details(body_hash: str): captures, domains = lookyloo.get_body_hash_investigator(body_hash) return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures) -# Query API +# Query API @app.route('/json//redirects', methods=['GET']) def json_redirects(tree_uuid: str): diff --git a/website/web/templates/hostname_popup.html b/website/web/templates/hostname_popup.html index 74884a6..9e4d517 100644 --- a/website/web/templates/hostname_popup.html +++ b/website/web/templates/hostname_popup.html @@ -170,19 +170,6 @@

{% endif %} - {% if url['sane_js'] %} -
- {% if url['sane_js'] is string %} - {{ url['sane_js'] }} - {% else %} - This file is known as part of {{ url['sane_js'][0] }} - version {{ url['sane_js'][1] }}: {{ url['sane_js'][2] }}. - {% if url['sane_js'][3] > 1%} - It is also present in {{ url['sane_js'][3] -1 }} other libraries. - {%endif%} - {%endif%} -
- {% endif %} {% if url['cookies_received'] %}