diff --git a/bin/rebuild_caches.py b/bin/rebuild_caches.py new file mode 100755 index 0000000..53ad52b --- /dev/null +++ b/bin/rebuild_caches.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +from lookyloo.lookyloo import Lookyloo + +logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', + level=logging.INFO, datefmt='%I:%M:%S') + +if __name__ == '__main__': + lookyloo = Lookyloo() + remove_pickles = input('Do you want to remove the pickles? Rebuilding will take a very long time. (y/N)') + if remove_pickles == 'y': + lookyloo.rebuild_all() + else: + lookyloo.rebuild_cache() diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index 1894624..b1a221e 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -13,6 +13,8 @@ from glob import glob import json import traceback from urllib.parse import urlparse +import pickle +from har2tree import CrawledTree from bs4 import BeautifulSoup # type: ignore try: @@ -210,3 +212,17 @@ def load_cookies(cookie_pseudofile: Optional[BufferedIOBase]=None) -> List[Dict[ except Exception as e: print(f'Unable to load the cookie file: {e}') return to_return + + +def load_pickle_tree(capture_dir: Path) -> Optional[CrawledTree]: + pickle_file = capture_dir / 'tree.pickle' + if pickle_file.exists(): + with pickle_file.open('rb') as _p: + return pickle.load(_p) + return None + + +def remove_pickle_tree(capture_dir: Path) -> None: + pickle_file = capture_dir / 'tree.pickle' + if pickle_file.exists(): + pickle_file.unlink() diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index e0af08d..ba8855e 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -19,6 +19,7 @@ from urllib.parse import urlsplit from uuid import uuid4 from zipfile import ZipFile +import publicsuffix2 # type: ignore from defang import refang # type: ignore from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode from redis import Redis @@ -27,7 +28,7 @@ from scrapysplashwrapper import crawl from werkzeug.useragents import UserAgent from .exceptions import NoValidHarFile, MissingUUID -from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template +from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree from .modules import VirusTotal, SaneJavaScript, PhishingInitiative @@ -97,17 +98,60 @@ class Lookyloo(): with self_generated_ua_file.open('w') as f: json.dump(to_store, f, indent=2) + def cache_tree(self, capture_uuid) -> None: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') + + with open((capture_dir / 'uuid'), 'r') as f: + uuid = f.read() + har_files = sorted(capture_dir.glob('*.har')) + try: + ct = CrawledTree(har_files, uuid) + except Har2TreeError as e: + raise NoValidHarFile(e.message) + + with (capture_dir / 'tree.pickle').open('wb') as _p: + pickle.dump(ct, _p) + + def get_crawled_tree(self, capture_uuid: str) -> CrawledTree: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') + ct = load_pickle_tree(capture_dir) + if not ct: + self.cache_tree(capture_uuid) + ct = load_pickle_tree(capture_dir) + + if not ct: + raise NoValidHarFile(f'Unable to get tree from {capture_dir}') + + return ct + + def load_tree(self, capture_uuid: str) -> Tuple[str, str, str, str, Dict[str, str]]: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') + meta = {} + if (capture_dir / 'meta').exists(): + with open((capture_dir / 'meta'), 'r') as f: + meta = json.load(f) + ct = self.get_crawled_tree(capture_uuid) + return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta + + def remove_pickle(self, capture_uuid: str) -> None: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') + remove_pickle_tree(capture_dir) + def rebuild_cache(self) -> None: self.redis.flushdb() self._init_existing_dumps() - def remove_pickle(self, capture_dir: Path) -> None: - if (capture_dir / 'tree.pickle').exists(): - (capture_dir / 'tree.pickle').unlink() - def rebuild_all(self) -> None: for capture_dir in self.capture_dirs: - self.remove_pickle(capture_dir) + remove_pickle_tree(capture_dir) self.rebuild_cache() def get_config(self, entry: str) -> Any: @@ -124,29 +168,39 @@ class Lookyloo(): sample_config = json.load(_c) return sample_config[entry] - def get_urlnode_from_tree(self, capture_dir: Path, node_uuid: str) -> URLNode: - ct = self._load_pickle(capture_dir / 'tree.pickle') + def get_urlnode_from_tree(self, capture_uuid: str, node_uuid: str) -> URLNode: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') + ct = load_pickle_tree(capture_dir) if not ct: raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}') return ct.root_hartree.get_url_node_by_uuid(node_uuid) - def get_hostnode_from_tree(self, capture_dir: Path, node_uuid: str) -> HostNode: - ct = self._load_pickle(capture_dir / 'tree.pickle') + def get_hostnode_from_tree(self, capture_uuid: str, node_uuid: str) -> HostNode: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') + ct = load_pickle_tree(capture_dir) if not ct: raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}') return ct.root_hartree.get_host_node_by_uuid(node_uuid) - def get_statistics(self, capture_dir: Path) -> Dict[str, Any]: - # We need the pickle - ct = self._load_pickle(capture_dir / 'tree.pickle') + def get_statistics(self, capture_uuid: str) -> Dict[str, Any]: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') + ct = load_pickle_tree(capture_dir) if not ct: self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_dir}) is cached.') return {} return ct.root_hartree.stats - def trigger_modules(self, capture_dir: Path, force: bool=False) -> None: - # We need the pickle - ct = self._load_pickle(capture_dir / 'tree.pickle') + def trigger_modules(self, capture_uuid: str, force: bool=False) -> None: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') + ct = load_pickle_tree(capture_dir) if not ct: self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_dir}) is cached.') return @@ -165,8 +219,11 @@ class Lookyloo(): else: self.vt.url_lookup(ct.root_hartree.har.root_url, force) - def get_modules_responses(self, capture_dir: Path) -> Optional[Dict[str, Any]]: - ct = self._load_pickle(capture_dir / 'tree.pickle') + def get_modules_responses(self, capture_uuid: str) -> Optional[Dict[str, Any]]: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') + ct = load_pickle_tree(capture_dir) if not ct: self.logger.warning(f'Unable to get the modules responses unless the tree ({capture_dir}) is cached.') return None @@ -219,7 +276,7 @@ class Lookyloo(): incomplete_redirects = False if redirects and har.need_tree_redirects: # load tree from disk, get redirects - ct = self._load_pickle(capture_dir / 'tree.pickle') + ct = load_pickle_tree(capture_dir) if ct: redirects = ct.redirects else: @@ -231,6 +288,7 @@ class Lookyloo(): 'timestamp': har.initial_start_time, 'url': har.root_url, 'redirects': json.dumps(redirects), + 'capture_dir': str(capture_dir), 'incomplete_redirects': 1 if incomplete_redirects else 0} if (capture_dir / 'no_index').exists(): # If the folders claims anonymity cache['no_index'] = 1 @@ -238,19 +296,27 @@ class Lookyloo(): self.redis.hmset(str(capture_dir), cache) self.redis.hset('lookup_dirs', uuid, str(capture_dir)) - def capture_cache(self, capture_dir: Path) -> Optional[Dict[str, Any]]: + @property + def capture_uuids(self): + return self.redis.hkeys('lookup_dirs') + + def capture_cache(self, capture_uuid: str) -> Dict[str, Any]: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') if self.redis.hget(str(capture_dir), 'incomplete_redirects') == '1': # try to rebuild the cache self._set_capture_cache(capture_dir, force=True) cached = self.redis.hgetall(str(capture_dir)) - if all(key in cached.keys() for key in ['uuid', 'title', 'timestamp', 'url', 'redirects']): + if all(key in cached.keys() for key in ['uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir']): cached['redirects'] = json.loads(cached['redirects']) + cached['capture_dir'] = Path(cached['capture_dir']) return cached elif 'error' in cached: return cached else: self.logger.warning(f'Cache ({capture_dir}) is invalid: {json.dumps(cached, indent=2)}') - return None + return {} def _init_existing_dumps(self) -> None: for capture_dir in self.capture_dirs: @@ -270,8 +336,8 @@ class Lookyloo(): f.write(str(uuid4())) return sorted(self.scrape_dir.iterdir(), reverse=True) - def lookup_capture_dir(self, uuid: str) -> Union[Path, None]: - capture_dir = self.redis.hget('lookup_dirs', uuid) + def lookup_capture_dir(self, capture_uuid: str) -> Union[Path, None]: + capture_dir = self.redis.hget('lookup_dirs', capture_uuid) if capture_dir: return Path(capture_dir) return None @@ -300,28 +366,20 @@ class Lookyloo(): return True return False - def _load_pickle(self, pickle_file: Path) -> Optional[CrawledTree]: - if pickle_file.exists(): - with pickle_file.open('rb') as _p: - return pickle.load(_p) - return None - def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None: if not self.get_config('enable_mail_notification'): return redirects = '' initial_url = '' - capture_dir = self.lookup_capture_dir(capture_uuid) - if capture_dir: - cache = self.capture_cache(capture_dir) - if cache: - initial_url = cache['url'] - if 'redirects' in cache and cache['redirects']: - redirects = "Redirects:\n" - redirects += '\n'.join(cache['redirects']) - else: - redirects = "No redirects." + cache = self.capture_cache(capture_uuid) + if cache: + initial_url = cache['url'] + if 'redirects' in cache and cache['redirects']: + redirects = "Redirects:\n" + redirects += '\n'.join(cache['redirects']) + else: + redirects = "No redirects." email_config = self.get_config('email') msg = EmailMessage() @@ -371,31 +429,10 @@ class Lookyloo(): with metafile.open('w') as f: json.dump(to_dump, f) - def get_crawled_tree(self, capture_dir: Path) -> CrawledTree: - pickle_file = capture_dir / 'tree.pickle' - ct = self._load_pickle(pickle_file) - if not ct: - with open((capture_dir / 'uuid'), 'r') as f: - uuid = f.read() - har_files = sorted(capture_dir.glob('*.har')) - try: - ct = CrawledTree(har_files, uuid) - self._ensure_meta(capture_dir, ct) - except Har2TreeError as e: - raise NoValidHarFile(e.message) - with pickle_file.open('wb') as _p: - pickle.dump(ct, _p) - return ct - - def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, str]]: - meta = {} - if (capture_dir / 'meta').exists(): - with open((capture_dir / 'meta'), 'r') as f: - meta = json.load(f) - ct = self.get_crawled_tree(capture_dir) - return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta - - def _get_raw(self, capture_dir: Path, extension: str='*', all_files: bool=True) -> BytesIO: + def _get_raw(self, capture_uuid: str, extension: str='*', all_files: bool=True) -> BytesIO: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache') all_paths = sorted(list(capture_dir.glob(f'*.{extension}'))) if not all_files: # Only get the first one in the list @@ -410,17 +447,17 @@ class Lookyloo(): to_return.seek(0) return to_return - def get_html(self, capture_dir: Path, all_html: bool=False) -> BytesIO: - return self._get_raw(capture_dir, 'html', all_html) + def get_html(self, capture_uuid: str, all_html: bool=False) -> BytesIO: + return self._get_raw(capture_uuid, 'html', all_html) - def get_cookies(self, capture_dir: Path, all_cookies: bool=False) -> BytesIO: - return self._get_raw(capture_dir, 'cookies.json', all_cookies) + def get_cookies(self, capture_uuid: str, all_cookies: bool=False) -> BytesIO: + return self._get_raw(capture_uuid, 'cookies.json', all_cookies) - def get_screenshot(self, capture_dir: Path, all_images: bool=False) -> BytesIO: - return self._get_raw(capture_dir, 'png', all_images) + def get_screenshot(self, capture_uuid: str, all_images: bool=False) -> BytesIO: + return self._get_raw(capture_uuid, 'png', all_images) - def get_capture(self, capture_dir: Path) -> BytesIO: - return self._get_raw(capture_dir) + def get_capture(self, capture_uuid: str) -> BytesIO: + return self._get_raw(capture_uuid) def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None, depth: int=1, listing: bool=True, user_agent: Optional[str]=None, @@ -505,8 +542,12 @@ class Lookyloo(): self._set_capture_cache(dirpath) return perma_uuid - def get_hostnode_investigator(self, capture_dir: Path, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]: - ct = self._load_pickle(capture_dir / 'tree.pickle') + def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]: + capture_dir = self.lookup_capture_dir(capture_uuid) + if not capture_dir: + raise MissingUUID(f'Unable to find {capture_uuid}') + + ct = load_pickle_tree(capture_dir) if not ct: raise MissingUUID(f'Unable to find {capture_dir}') hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid) @@ -536,16 +577,17 @@ class Lookyloo(): else: to_append['url_path_short'] = to_append['url_path'] - # Optional: SaneJS information - if hasattr(url, 'body_hash') and url.body_hash in sanejs_lookups: - if sanejs_lookups[url.body_hash]: - if isinstance(sanejs_lookups[url.body_hash], list): - libname, version, path = sanejs_lookups[url.body_hash][0].split("|") - other_files = len(sanejs_lookups[url.body_hash]) - to_append['sane_js'] = (libname, version, path, other_files) - else: - # Predefined generic file - to_append['sane_js'] = sanejs_lookups[url.body_hash] + if not url.empty_response: + # Optional: SaneJS information + if url.body_hash in sanejs_lookups: + if sanejs_lookups[url.body_hash]: + if isinstance(sanejs_lookups[url.body_hash], list): + libname, version, path = sanejs_lookups[url.body_hash][0].split("|") + other_files = len(sanejs_lookups[url.body_hash]) + to_append['sane_js'] = (libname, version, path, other_files) + else: + # Predefined generic file + to_append['sane_js'] = sanejs_lookups[url.body_hash] # Optional: Cookies sent to server in request -> map to nodes who set the cookie in response if hasattr(url, 'cookies_sent'): diff --git a/lookyloo/modules.py b/lookyloo/modules.py index 1f2da80..a14ee57 100644 --- a/lookyloo/modules.py +++ b/lookyloo/modules.py @@ -52,10 +52,16 @@ class SaneJavaScript(): "71db01662075fac031dea18b2c766826c77dbab01400a8642cdc7059394841d5df9020076554c3beca6f808187d42e1a1acc98fad9a0e1ad32ae869145f53746": "This is a 1*1 pixel GIF", "49b8daf1f5ba868bc8c6b224c787a75025ca36513ef8633d1d8f34e48ee0b578f466fcc104a7bed553404ddc5f9faff3fef5f894b31cd57f32245e550fad656a": "This is a 1*1 pixel GIF", "c57ebbadcf59f982ba28da35fdbd5e5369a8500a2e1edad0dc9c9174de6fd99f437953732e545b95d3de5943c61077b6b949c989f49553ff2e483f68fcc30641": "This is a 1*1 pixel GIF", + "c87bf81fd70cf6434ca3a6c05ad6e9bd3f1d96f77dddad8d45ee043b126b2cb07a5cf23b4137b9d8462cd8a9adf2b463ab6de2b38c93db72d2d511ca60e3b57e": "This is a 1*1 pixel GIF", + "fd8b021f0236e487bfee13bf8f0ae98760abc492f7ca3023e292631979e135cb4ccb0c89b6234971b060ad72c0ca4474cbb5092c6c7a3255d81a54a36277b486": "This is a 1*1 pixel GIF", + "235479f42cbbe0a4b0100167fece0d14c9b47d272b3ba8322bcfe8539f055bf31d500e7b2995cc968ebf73034e039f59c5f0f9410428663034bf119d74b5672c": "This is a 1*1 pixel GIF", + "a85e09c3b5dbb560f4e03ba880047dbc8b4999a64c1f54fbfbca17ee0bcbed3bc6708d699190b56668e464a59358d6b534c3963a1329ba01db21075ef5bedace": "This is a 1*1 pixel GIF", + "27656d6106a6da0c84174ba7a6307e6f1c4b3f2cc085c8466b6a25d54331035dabc7081aac208d960d8d37c5577547628c0d1c4b77bb4cf254c71859673feec1": "This is a 1*1 pixel GIF", # "": "This is a 1*1 pixel GIF", "f1c33e72643ce366fd578e3b5d393799e8c9ea27b180987826af43b4fc00b65a4eaae5e6426a23448956fee99e3108c6a86f32fb4896c156e24af0571a11c498": "This is a 1*1 pixel PNG", "dc7c40381b3d22919e32c1b700ccb77b1b0aea2690642d01c1ac802561e135c01d5a4d2a0ea18efc0ec3362e8c549814a10a23563f1f56bd62aee0ced7e2bd99": "This is a 1*1 pixel PNG", "c2c239cb5cdd0b670780ad6414ef6be9ccd4c21ce46bb93d1fa3120ac812f1679445162978c3df05cb2e1582a1844cc4c41cf74960b8fdae3123999c5d2176cc": "This is a 1*1 pixel PNG", + "6ad523f5b65487369d305613366b9f68dcdeee225291766e3b25faf45439ca069f614030c08ca54c714fdbf7a944fac489b1515a8bf9e0d3191e1bcbbfe6a9df": "This is a 1*1 pixel PNG", # "": "This is a 1*1 pixel PNG", "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e": "This is an empty file" } diff --git a/pyproject.toml b/pyproject.toml index 17f60c8..9434e09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ run_backend = "bin/run_backend.py" async_scrape = "bin/async_scrape.py" shutdown = "bin/shutdown.py" stop = "bin/stop.py" +rebuild_caches = "bin/rebuild_caches.py" [tool.poetry.dependencies] diff --git a/setup.py b/setup.py index ffc06c1..9fd8dd2 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup( description='Web interface to track the trackers.', packages=['lookyloo'], scripts=['bin/start_website.py', 'bin/start.py', 'bin/run_backend.py', 'bin/async_scrape.py', - 'bin/shutdown.py', 'bin/stop.py'], + 'bin/shutdown.py', 'bin/stop.py', 'bin/rebuild_caches.py'], include_package_data=True, classifiers=[ 'License :: OSI Approved :: BSD License', diff --git a/website/3rdparty.sh b/website/3rdparty.sh index c9d8494..69a88cf 100755 --- a/website/3rdparty.sh +++ b/website/3rdparty.sh @@ -11,7 +11,7 @@ FileSaver="v2.0.2" wget -q https://raw.githubusercontent.com/eligrey/FileSaver.js/${FileSaver}/src/FileSaver.js -O web/static/FileSaver.js -datatables="1.10.20" +datatables="1.10.21" wget -q https://cdn.datatables.net/v/bs4/dt-${datatables}/datatables.min.css -O web/static/datatables.min.css wget -q https://cdn.datatables.net/v/bs4/dt-${datatables}/datatables.min.js -O web/static/datatables.min.js diff --git a/website/web/__init__.py b/website/web/__init__.py index a3938db..b4cfdf5 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -14,7 +14,7 @@ from flask_httpauth import HTTPDigestAuth # type: ignore from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents from lookyloo.lookyloo import Lookyloo -from lookyloo.exceptions import NoValidHarFile +from lookyloo.exceptions import NoValidHarFile, MissingUUID from .proxied import ReverseProxied from typing import Optional, Dict, Any @@ -96,11 +96,11 @@ def rebuild_cache(): @app.route('/tree//rebuild') @auth.login_required def rebuild_tree(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if capture_dir: - lookyloo.remove_pickle(capture_dir) + try: + lookyloo.remove_pickle(tree_uuid) return redirect(url_for('tree', tree_uuid=tree_uuid)) - return redirect(url_for('index')) + except Exception: + return redirect(url_for('index')) @app.route('/submit', methods=['POST', 'GET']) @@ -140,10 +140,7 @@ def scrape_web(): @app.route('/tree//hostname//text', methods=['GET']) def hostnode_details_text(tree_uuid: str, node_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return - hostnode = lookyloo.get_hostnode_from_tree(capture_dir, node_uuid) + hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid) urls = [] for url in hostnode.urls: urls.append(url.name) @@ -159,10 +156,6 @@ def hostnode_details_text(tree_uuid: str, node_uuid: str): @app.route('/tree//hostname_popup/', methods=['GET']) def hostnode_popup(tree_uuid: str, node_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return - keys_response = { 'js': "/static/javascript.png", 'exe': "/static/exe.png", @@ -182,7 +175,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str): 'request_cookie': "/static/cookie_read.png", } - hostnode, urls = lookyloo.get_hostnode_investigator(capture_dir, node_uuid) + hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid) return render_template('hostname_popup.html', tree_uuid=tree_uuid, @@ -195,10 +188,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str): @app.route('/tree//url//request_cookies', methods=['GET']) def urlnode_request_cookies(tree_uuid: str, node_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return - urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid) + urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) if not urlnode.request_cookie: return @@ -208,10 +198,7 @@ def urlnode_request_cookies(tree_uuid: str, node_uuid: str): @app.route('/tree//url//response_cookies', methods=['GET']) def urlnode_response_cookies(tree_uuid: str, node_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return - urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid) + urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) if not urlnode.response_cookie: return @@ -221,10 +208,7 @@ def urlnode_response_cookies(tree_uuid: str, node_uuid: str): @app.route('/tree//url//posted_data', methods=['GET']) def urlnode_post_request(tree_uuid: str, node_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return - urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid) + urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) if not urlnode.posted_data: return if isinstance(urlnode.posted_data, (dict, list)): @@ -244,10 +228,7 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str): @app.route('/tree//url/', methods=['GET']) def urlnode_details(tree_uuid: str, node_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return - urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid) + urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) to_return = BytesIO() got_content = False if hasattr(urlnode, 'body'): @@ -267,28 +248,19 @@ def urlnode_details(tree_uuid: str, node_uuid: str): @app.route('/tree//trigger_modules/', defaults={'force': False}) @app.route('/tree//trigger_modules/', methods=['GET']) def trigger_modules(tree_uuid: str, force: int): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return Response('Not available.', mimetype='text/text') - lookyloo.trigger_modules(capture_dir, True if force else False) + lookyloo.trigger_modules(tree_uuid, True if force else False) return redirect(url_for('modules', tree_uuid=tree_uuid)) @app.route('/tree//stats', methods=['GET']) def stats(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return Response('Not available.', mimetype='text/text') - stats = lookyloo.get_statistics(capture_dir) + stats = lookyloo.get_statistics(tree_uuid) return render_template('statistics.html', uuid=tree_uuid, stats=stats) @app.route('/tree//modules', methods=['GET']) def modules(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return Response('Not available.', mimetype='text/text') - modules_responses = lookyloo.get_modules_responses(capture_dir) + modules_responses = lookyloo.get_modules_responses(tree_uuid) if not modules_responses: return redirect(url_for('tree', tree_uuid=tree_uuid)) @@ -319,50 +291,35 @@ def modules(tree_uuid: str): @app.route('/tree//image', methods=['GET']) def image(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return Response('Not available.', mimetype='text/text') - to_return = lookyloo.get_screenshot(capture_dir) + to_return = lookyloo.get_screenshot(tree_uuid) return send_file(to_return, mimetype='image/png', as_attachment=True, attachment_filename='image.png') @app.route('/tree//html', methods=['GET']) def html(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return Response('Not available.', mimetype='text/text') - to_return = lookyloo.get_html(capture_dir) + to_return = lookyloo.get_html(tree_uuid) return send_file(to_return, mimetype='text/html', as_attachment=True, attachment_filename='page.html') @app.route('/tree//cookies', methods=['GET']) def cookies(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return Response('Not available.', mimetype='text/text') - to_return = lookyloo.get_cookies(capture_dir) + to_return = lookyloo.get_cookies(tree_uuid) return send_file(to_return, mimetype='application/json', as_attachment=True, attachment_filename='cookies.json') @app.route('/tree//export', methods=['GET']) def export(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return Response('Not available.', mimetype='text/text') - to_return = lookyloo.get_capture(capture_dir) + to_return = lookyloo.get_capture(tree_uuid) return send_file(to_return, mimetype='application/zip', as_attachment=True, attachment_filename='capture.zip') @app.route('/redirects/', methods=['GET']) def redirects(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return Response('Not available.', mimetype='text/text') - cache = lookyloo.capture_cache(capture_dir) + cache = lookyloo.capture_cache(tree_uuid) if not cache: return Response('Not available.', mimetype='text/text') if not cache['redirects']: @@ -374,9 +331,7 @@ def redirects(tree_uuid: str): @app.route('/cache_tree/', methods=['GET']) def cache_tree(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if capture_dir: - lookyloo.load_tree(capture_dir) + lookyloo.cache_tree(tree_uuid) return redirect(url_for('index')) @@ -389,16 +344,17 @@ def send_mail(tree_uuid: str): @app.route('/tree/', methods=['GET']) -def tree(tree_uuid: str): +@app.route('/tree//', methods=['GET']) +def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None): if tree_uuid == 'False': flash("Unable to process your request. The domain may not exist, or splash isn't started", 'error') return redirect(url_for('index')) - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: + try: + cache = lookyloo.capture_cache(tree_uuid) + except MissingUUID: flash(f'Unable to find this UUID ({tree_uuid}). The capture may still be ongoing, try again later.', 'error') return redirect(url_for('index')) - cache = lookyloo.capture_cache(capture_dir) if not cache: flash('Invalid cache.', 'error') return redirect(url_for('index')) @@ -412,10 +368,12 @@ def tree(tree_uuid: str): enable_mail_notification = True else: enable_mail_notification = False - tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(capture_dir) + tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(tree_uuid) return render_template('tree.html', tree_json=tree_json, start_time=start_time, user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid, - meta=meta, enable_mail_notification=enable_mail_notification) + meta=meta, enable_mail_notification=enable_mail_notification, + urlnode_uuid=urlnode_uuid) + except NoValidHarFile as e: return render_template('error.html', error_message=e) @@ -427,8 +385,8 @@ def index_generic(show_hidden: bool=False): cut_time = datetime.now() - timedelta(**time_delta_on_index) else: cut_time = None # type: ignore - for capture_dir in lookyloo.capture_dirs: - cached = lookyloo.capture_cache(capture_dir) + for capture_uuid in lookyloo.capture_uuids: + cached = lookyloo.capture_cache(capture_uuid) if not cached or 'error' in cached: continue if show_hidden: @@ -459,15 +417,12 @@ def index(): def index_hidden(): return index_generic(show_hidden=True) - # Query API + @app.route('/json//redirects', methods=['GET']) def json_redirects(tree_uuid: str): - capture_dir = lookyloo.lookup_capture_dir(tree_uuid) - if not capture_dir: - return {'error': 'Unknown UUID, try again later.'} - cache = lookyloo.capture_cache(capture_dir) + cache = lookyloo.capture_cache(tree_uuid) if not cache: return {'error': 'UUID missing in cache, try again later.'} @@ -477,8 +432,8 @@ def json_redirects(tree_uuid: str): return to_return if cache['incomplete_redirects']: # Trigger tree build, get all redirects - lookyloo.load_tree(capture_dir) - cache = lookyloo.capture_cache(capture_dir) + lookyloo.load_tree(tree_uuid) + cache = lookyloo.capture_cache(tree_uuid) if cache: to_return['response']['redirects'] = cache['redirects'] else: diff --git a/website/web/static/tree.js b/website/web/static/tree.js index d237c93..7d4d3de 100644 --- a/website/web/static/tree.js +++ b/website/web/static/tree.js @@ -77,30 +77,15 @@ function urlnode_click(d) { }); }; -d3.selection.prototype.moveToFront = function() { - return this.each(function() { - this.parentNode.appendChild(this); - }); -}; - -d3.selection.prototype.moveToBack = function() { - return this.each(function() { - var firstChild = this.parentNode.firstChild; - if (firstChild) { - this.parentNode.insertBefore(this, firstChild); - } - }); -}; - function hostnode_click_popup(d) { window.open('/tree/' + treeUUID + '/hostname_popup/' + d.data.uuid, '_blank', 'width=1024,height=768,left=200,top=100'); }; -function ProcessChildMessage(message) { - var element = document.getElementById("node_" + message); +function ProcessChildMessage(urlnode_uuid) { + var element = document.getElementById("node_" + urlnode_uuid); element.scrollIntoView({behavior: "smooth", block: "center", inline: "nearest"}); - var to_blink = d3.select("#node_" + message).select('text'); + var to_blink = d3.select("#node_" + urlnode_uuid).select('text'); to_blink .transition().duration(500) //Set transition .style('fill', 'red') diff --git a/website/web/templates/hostname_popup.html b/website/web/templates/hostname_popup.html index ba91a6f..66398c1 100644 --- a/website/web/templates/hostname_popup.html +++ b/website/web/templates/hostname_popup.html @@ -105,15 +105,15 @@ {% if url['sane_js'] %}
- {% if url['sane_js'] is string %} - {{ url['sane_js'] }} - {% else %} - This file is known as part of {{ url['sane_js'][0] }} - version {{ url['sane_js'][1] }}: {{ url['sane_js'][2] }}. - {% if url['sane_js'][3] > 1%} + {% if url['sane_js'] is string %} + {{ url['sane_js'] }} + {% else %} + This file is known as part of {{ url['sane_js'][0] }} + version {{ url['sane_js'][1] }}: {{ url['sane_js'][2] }}. + {% if url['sane_js'][3] > 1%} It is also present in {{ url['sane_js'][3] -1 }} other libraries. - {%endif%} {%endif%} + {%endif%}
{% endif %} diff --git a/website/web/templates/tree.html b/website/web/templates/tree.html index b35ca40..48e9fda 100644 --- a/website/web/templates/tree.html +++ b/website/web/templates/tree.html @@ -35,6 +35,14 @@ }); }); + {% if urlnode_uuid %} + + {% endif%} {% endblock %} {% block content %}