chg: Use capture UUID as a reference everywhere

pull/79/head
Raphaël Vinot 2020-06-29 11:59:01 +02:00
parent fa935a6773
commit 05de56022f
11 changed files with 221 additions and 192 deletions

16
bin/rebuild_caches.py Executable file
View File

@ -0,0 +1,16 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from lookyloo.lookyloo import Lookyloo
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO, datefmt='%I:%M:%S')
if __name__ == '__main__':
lookyloo = Lookyloo()
remove_pickles = input('Do you want to remove the pickles? Rebuilding will take a very long time. (y/N)')
if remove_pickles == 'y':
lookyloo.rebuild_all()
else:
lookyloo.rebuild_cache()

View File

@ -13,6 +13,8 @@ from glob import glob
import json
import traceback
from urllib.parse import urlparse
import pickle
from har2tree import CrawledTree
from bs4 import BeautifulSoup # type: ignore
try:
@ -210,3 +212,17 @@ def load_cookies(cookie_pseudofile: Optional[BufferedIOBase]=None) -> List[Dict[
except Exception as e:
print(f'Unable to load the cookie file: {e}')
return to_return
def load_pickle_tree(capture_dir: Path) -> Optional[CrawledTree]:
pickle_file = capture_dir / 'tree.pickle'
if pickle_file.exists():
with pickle_file.open('rb') as _p:
return pickle.load(_p)
return None
def remove_pickle_tree(capture_dir: Path) -> None:
pickle_file = capture_dir / 'tree.pickle'
if pickle_file.exists():
pickle_file.unlink()

View File

@ -19,6 +19,7 @@ from urllib.parse import urlsplit
from uuid import uuid4
from zipfile import ZipFile
import publicsuffix2 # type: ignore
from defang import refang # type: ignore
from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode
from redis import Redis
@ -27,7 +28,7 @@ from scrapysplashwrapper import crawl
from werkzeug.useragents import UserAgent
from .exceptions import NoValidHarFile, MissingUUID
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
@ -97,17 +98,60 @@ class Lookyloo():
with self_generated_ua_file.open('w') as f:
json.dump(to_store, f, indent=2)
def cache_tree(self, capture_uuid) -> None:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
with open((capture_dir / 'uuid'), 'r') as f:
uuid = f.read()
har_files = sorted(capture_dir.glob('*.har'))
try:
ct = CrawledTree(har_files, uuid)
except Har2TreeError as e:
raise NoValidHarFile(e.message)
with (capture_dir / 'tree.pickle').open('wb') as _p:
pickle.dump(ct, _p)
def get_crawled_tree(self, capture_uuid: str) -> CrawledTree:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
ct = load_pickle_tree(capture_dir)
if not ct:
self.cache_tree(capture_uuid)
ct = load_pickle_tree(capture_dir)
if not ct:
raise NoValidHarFile(f'Unable to get tree from {capture_dir}')
return ct
def load_tree(self, capture_uuid: str) -> Tuple[str, str, str, str, Dict[str, str]]:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
meta = {}
if (capture_dir / 'meta').exists():
with open((capture_dir / 'meta'), 'r') as f:
meta = json.load(f)
ct = self.get_crawled_tree(capture_uuid)
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
def remove_pickle(self, capture_uuid: str) -> None:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
remove_pickle_tree(capture_dir)
def rebuild_cache(self) -> None:
self.redis.flushdb()
self._init_existing_dumps()
def remove_pickle(self, capture_dir: Path) -> None:
if (capture_dir / 'tree.pickle').exists():
(capture_dir / 'tree.pickle').unlink()
def rebuild_all(self) -> None:
for capture_dir in self.capture_dirs:
self.remove_pickle(capture_dir)
remove_pickle_tree(capture_dir)
self.rebuild_cache()
def get_config(self, entry: str) -> Any:
@ -124,29 +168,39 @@ class Lookyloo():
sample_config = json.load(_c)
return sample_config[entry]
def get_urlnode_from_tree(self, capture_dir: Path, node_uuid: str) -> URLNode:
ct = self._load_pickle(capture_dir / 'tree.pickle')
def get_urlnode_from_tree(self, capture_uuid: str, node_uuid: str) -> URLNode:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
ct = load_pickle_tree(capture_dir)
if not ct:
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
return ct.root_hartree.get_url_node_by_uuid(node_uuid)
def get_hostnode_from_tree(self, capture_dir: Path, node_uuid: str) -> HostNode:
ct = self._load_pickle(capture_dir / 'tree.pickle')
def get_hostnode_from_tree(self, capture_uuid: str, node_uuid: str) -> HostNode:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
ct = load_pickle_tree(capture_dir)
if not ct:
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
return ct.root_hartree.get_host_node_by_uuid(node_uuid)
def get_statistics(self, capture_dir: Path) -> Dict[str, Any]:
# We need the pickle
ct = self._load_pickle(capture_dir / 'tree.pickle')
def get_statistics(self, capture_uuid: str) -> Dict[str, Any]:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
ct = load_pickle_tree(capture_dir)
if not ct:
self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_dir}) is cached.')
return {}
return ct.root_hartree.stats
def trigger_modules(self, capture_dir: Path, force: bool=False) -> None:
# We need the pickle
ct = self._load_pickle(capture_dir / 'tree.pickle')
def trigger_modules(self, capture_uuid: str, force: bool=False) -> None:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
ct = load_pickle_tree(capture_dir)
if not ct:
self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_dir}) is cached.')
return
@ -165,8 +219,11 @@ class Lookyloo():
else:
self.vt.url_lookup(ct.root_hartree.har.root_url, force)
def get_modules_responses(self, capture_dir: Path) -> Optional[Dict[str, Any]]:
ct = self._load_pickle(capture_dir / 'tree.pickle')
def get_modules_responses(self, capture_uuid: str) -> Optional[Dict[str, Any]]:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
ct = load_pickle_tree(capture_dir)
if not ct:
self.logger.warning(f'Unable to get the modules responses unless the tree ({capture_dir}) is cached.')
return None
@ -219,7 +276,7 @@ class Lookyloo():
incomplete_redirects = False
if redirects and har.need_tree_redirects:
# load tree from disk, get redirects
ct = self._load_pickle(capture_dir / 'tree.pickle')
ct = load_pickle_tree(capture_dir)
if ct:
redirects = ct.redirects
else:
@ -231,6 +288,7 @@ class Lookyloo():
'timestamp': har.initial_start_time,
'url': har.root_url,
'redirects': json.dumps(redirects),
'capture_dir': str(capture_dir),
'incomplete_redirects': 1 if incomplete_redirects else 0}
if (capture_dir / 'no_index').exists(): # If the folders claims anonymity
cache['no_index'] = 1
@ -238,19 +296,27 @@ class Lookyloo():
self.redis.hmset(str(capture_dir), cache)
self.redis.hset('lookup_dirs', uuid, str(capture_dir))
def capture_cache(self, capture_dir: Path) -> Optional[Dict[str, Any]]:
@property
def capture_uuids(self):
return self.redis.hkeys('lookup_dirs')
def capture_cache(self, capture_uuid: str) -> Dict[str, Any]:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
if self.redis.hget(str(capture_dir), 'incomplete_redirects') == '1':
# try to rebuild the cache
self._set_capture_cache(capture_dir, force=True)
cached = self.redis.hgetall(str(capture_dir))
if all(key in cached.keys() for key in ['uuid', 'title', 'timestamp', 'url', 'redirects']):
if all(key in cached.keys() for key in ['uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir']):
cached['redirects'] = json.loads(cached['redirects'])
cached['capture_dir'] = Path(cached['capture_dir'])
return cached
elif 'error' in cached:
return cached
else:
self.logger.warning(f'Cache ({capture_dir}) is invalid: {json.dumps(cached, indent=2)}')
return None
return {}
def _init_existing_dumps(self) -> None:
for capture_dir in self.capture_dirs:
@ -270,8 +336,8 @@ class Lookyloo():
f.write(str(uuid4()))
return sorted(self.scrape_dir.iterdir(), reverse=True)
def lookup_capture_dir(self, uuid: str) -> Union[Path, None]:
capture_dir = self.redis.hget('lookup_dirs', uuid)
def lookup_capture_dir(self, capture_uuid: str) -> Union[Path, None]:
capture_dir = self.redis.hget('lookup_dirs', capture_uuid)
if capture_dir:
return Path(capture_dir)
return None
@ -300,28 +366,20 @@ class Lookyloo():
return True
return False
def _load_pickle(self, pickle_file: Path) -> Optional[CrawledTree]:
if pickle_file.exists():
with pickle_file.open('rb') as _p:
return pickle.load(_p)
return None
def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None:
if not self.get_config('enable_mail_notification'):
return
redirects = ''
initial_url = ''
capture_dir = self.lookup_capture_dir(capture_uuid)
if capture_dir:
cache = self.capture_cache(capture_dir)
if cache:
initial_url = cache['url']
if 'redirects' in cache and cache['redirects']:
redirects = "Redirects:\n"
redirects += '\n'.join(cache['redirects'])
else:
redirects = "No redirects."
cache = self.capture_cache(capture_uuid)
if cache:
initial_url = cache['url']
if 'redirects' in cache and cache['redirects']:
redirects = "Redirects:\n"
redirects += '\n'.join(cache['redirects'])
else:
redirects = "No redirects."
email_config = self.get_config('email')
msg = EmailMessage()
@ -371,31 +429,10 @@ class Lookyloo():
with metafile.open('w') as f:
json.dump(to_dump, f)
def get_crawled_tree(self, capture_dir: Path) -> CrawledTree:
pickle_file = capture_dir / 'tree.pickle'
ct = self._load_pickle(pickle_file)
if not ct:
with open((capture_dir / 'uuid'), 'r') as f:
uuid = f.read()
har_files = sorted(capture_dir.glob('*.har'))
try:
ct = CrawledTree(har_files, uuid)
self._ensure_meta(capture_dir, ct)
except Har2TreeError as e:
raise NoValidHarFile(e.message)
with pickle_file.open('wb') as _p:
pickle.dump(ct, _p)
return ct
def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, str]]:
meta = {}
if (capture_dir / 'meta').exists():
with open((capture_dir / 'meta'), 'r') as f:
meta = json.load(f)
ct = self.get_crawled_tree(capture_dir)
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
def _get_raw(self, capture_dir: Path, extension: str='*', all_files: bool=True) -> BytesIO:
def _get_raw(self, capture_uuid: str, extension: str='*', all_files: bool=True) -> BytesIO:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
all_paths = sorted(list(capture_dir.glob(f'*.{extension}')))
if not all_files:
# Only get the first one in the list
@ -410,17 +447,17 @@ class Lookyloo():
to_return.seek(0)
return to_return
def get_html(self, capture_dir: Path, all_html: bool=False) -> BytesIO:
return self._get_raw(capture_dir, 'html', all_html)
def get_html(self, capture_uuid: str, all_html: bool=False) -> BytesIO:
return self._get_raw(capture_uuid, 'html', all_html)
def get_cookies(self, capture_dir: Path, all_cookies: bool=False) -> BytesIO:
return self._get_raw(capture_dir, 'cookies.json', all_cookies)
def get_cookies(self, capture_uuid: str, all_cookies: bool=False) -> BytesIO:
return self._get_raw(capture_uuid, 'cookies.json', all_cookies)
def get_screenshot(self, capture_dir: Path, all_images: bool=False) -> BytesIO:
return self._get_raw(capture_dir, 'png', all_images)
def get_screenshot(self, capture_uuid: str, all_images: bool=False) -> BytesIO:
return self._get_raw(capture_uuid, 'png', all_images)
def get_capture(self, capture_dir: Path) -> BytesIO:
return self._get_raw(capture_dir)
def get_capture(self, capture_uuid: str) -> BytesIO:
return self._get_raw(capture_uuid)
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None,
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
@ -505,8 +542,12 @@ class Lookyloo():
self._set_capture_cache(dirpath)
return perma_uuid
def get_hostnode_investigator(self, capture_dir: Path, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
ct = self._load_pickle(capture_dir / 'tree.pickle')
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find {capture_uuid}')
ct = load_pickle_tree(capture_dir)
if not ct:
raise MissingUUID(f'Unable to find {capture_dir}')
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
@ -536,16 +577,17 @@ class Lookyloo():
else:
to_append['url_path_short'] = to_append['url_path']
# Optional: SaneJS information
if hasattr(url, 'body_hash') and url.body_hash in sanejs_lookups:
if sanejs_lookups[url.body_hash]:
if isinstance(sanejs_lookups[url.body_hash], list):
libname, version, path = sanejs_lookups[url.body_hash][0].split("|")
other_files = len(sanejs_lookups[url.body_hash])
to_append['sane_js'] = (libname, version, path, other_files)
else:
# Predefined generic file
to_append['sane_js'] = sanejs_lookups[url.body_hash]
if not url.empty_response:
# Optional: SaneJS information
if url.body_hash in sanejs_lookups:
if sanejs_lookups[url.body_hash]:
if isinstance(sanejs_lookups[url.body_hash], list):
libname, version, path = sanejs_lookups[url.body_hash][0].split("|")
other_files = len(sanejs_lookups[url.body_hash])
to_append['sane_js'] = (libname, version, path, other_files)
else:
# Predefined generic file
to_append['sane_js'] = sanejs_lookups[url.body_hash]
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
if hasattr(url, 'cookies_sent'):

View File

@ -52,10 +52,16 @@ class SaneJavaScript():
"71db01662075fac031dea18b2c766826c77dbab01400a8642cdc7059394841d5df9020076554c3beca6f808187d42e1a1acc98fad9a0e1ad32ae869145f53746": "This is a 1*1 pixel GIF",
"49b8daf1f5ba868bc8c6b224c787a75025ca36513ef8633d1d8f34e48ee0b578f466fcc104a7bed553404ddc5f9faff3fef5f894b31cd57f32245e550fad656a": "This is a 1*1 pixel GIF",
"c57ebbadcf59f982ba28da35fdbd5e5369a8500a2e1edad0dc9c9174de6fd99f437953732e545b95d3de5943c61077b6b949c989f49553ff2e483f68fcc30641": "This is a 1*1 pixel GIF",
"c87bf81fd70cf6434ca3a6c05ad6e9bd3f1d96f77dddad8d45ee043b126b2cb07a5cf23b4137b9d8462cd8a9adf2b463ab6de2b38c93db72d2d511ca60e3b57e": "This is a 1*1 pixel GIF",
"fd8b021f0236e487bfee13bf8f0ae98760abc492f7ca3023e292631979e135cb4ccb0c89b6234971b060ad72c0ca4474cbb5092c6c7a3255d81a54a36277b486": "This is a 1*1 pixel GIF",
"235479f42cbbe0a4b0100167fece0d14c9b47d272b3ba8322bcfe8539f055bf31d500e7b2995cc968ebf73034e039f59c5f0f9410428663034bf119d74b5672c": "This is a 1*1 pixel GIF",
"a85e09c3b5dbb560f4e03ba880047dbc8b4999a64c1f54fbfbca17ee0bcbed3bc6708d699190b56668e464a59358d6b534c3963a1329ba01db21075ef5bedace": "This is a 1*1 pixel GIF",
"27656d6106a6da0c84174ba7a6307e6f1c4b3f2cc085c8466b6a25d54331035dabc7081aac208d960d8d37c5577547628c0d1c4b77bb4cf254c71859673feec1": "This is a 1*1 pixel GIF",
# "": "This is a 1*1 pixel GIF",
"f1c33e72643ce366fd578e3b5d393799e8c9ea27b180987826af43b4fc00b65a4eaae5e6426a23448956fee99e3108c6a86f32fb4896c156e24af0571a11c498": "This is a 1*1 pixel PNG",
"dc7c40381b3d22919e32c1b700ccb77b1b0aea2690642d01c1ac802561e135c01d5a4d2a0ea18efc0ec3362e8c549814a10a23563f1f56bd62aee0ced7e2bd99": "This is a 1*1 pixel PNG",
"c2c239cb5cdd0b670780ad6414ef6be9ccd4c21ce46bb93d1fa3120ac812f1679445162978c3df05cb2e1582a1844cc4c41cf74960b8fdae3123999c5d2176cc": "This is a 1*1 pixel PNG",
"6ad523f5b65487369d305613366b9f68dcdeee225291766e3b25faf45439ca069f614030c08ca54c714fdbf7a944fac489b1515a8bf9e0d3191e1bcbbfe6a9df": "This is a 1*1 pixel PNG",
# "": "This is a 1*1 pixel PNG",
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e": "This is an empty file"
}

View File

@ -26,6 +26,7 @@ run_backend = "bin/run_backend.py"
async_scrape = "bin/async_scrape.py"
shutdown = "bin/shutdown.py"
stop = "bin/stop.py"
rebuild_caches = "bin/rebuild_caches.py"
[tool.poetry.dependencies]

View File

@ -13,7 +13,7 @@ setup(
description='Web interface to track the trackers.',
packages=['lookyloo'],
scripts=['bin/start_website.py', 'bin/start.py', 'bin/run_backend.py', 'bin/async_scrape.py',
'bin/shutdown.py', 'bin/stop.py'],
'bin/shutdown.py', 'bin/stop.py', 'bin/rebuild_caches.py'],
include_package_data=True,
classifiers=[
'License :: OSI Approved :: BSD License',

View File

@ -11,7 +11,7 @@ FileSaver="v2.0.2"
wget -q https://raw.githubusercontent.com/eligrey/FileSaver.js/${FileSaver}/src/FileSaver.js -O web/static/FileSaver.js
datatables="1.10.20"
datatables="1.10.21"
wget -q https://cdn.datatables.net/v/bs4/dt-${datatables}/datatables.min.css -O web/static/datatables.min.css
wget -q https://cdn.datatables.net/v/bs4/dt-${datatables}/datatables.min.js -O web/static/datatables.min.js

View File

@ -14,7 +14,7 @@ from flask_httpauth import HTTPDigestAuth # type: ignore
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents
from lookyloo.lookyloo import Lookyloo
from lookyloo.exceptions import NoValidHarFile
from lookyloo.exceptions import NoValidHarFile, MissingUUID
from .proxied import ReverseProxied
from typing import Optional, Dict, Any
@ -96,11 +96,11 @@ def rebuild_cache():
@app.route('/tree/<string:tree_uuid>/rebuild')
@auth.login_required
def rebuild_tree(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if capture_dir:
lookyloo.remove_pickle(capture_dir)
try:
lookyloo.remove_pickle(tree_uuid)
return redirect(url_for('tree', tree_uuid=tree_uuid))
return redirect(url_for('index'))
except Exception:
return redirect(url_for('index'))
@app.route('/submit', methods=['POST', 'GET'])
@ -140,10 +140,7 @@ def scrape_web():
@app.route('/tree/<string:tree_uuid>/hostname/<string:node_uuid>/text', methods=['GET'])
def hostnode_details_text(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
hostnode = lookyloo.get_hostnode_from_tree(capture_dir, node_uuid)
hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid)
urls = []
for url in hostnode.urls:
urls.append(url.name)
@ -159,10 +156,6 @@ def hostnode_details_text(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/hostname_popup/<string:node_uuid>', methods=['GET'])
def hostnode_popup(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
keys_response = {
'js': "/static/javascript.png",
'exe': "/static/exe.png",
@ -182,7 +175,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
'request_cookie': "/static/cookie_read.png",
}
hostnode, urls = lookyloo.get_hostnode_investigator(capture_dir, node_uuid)
hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid)
return render_template('hostname_popup.html',
tree_uuid=tree_uuid,
@ -195,10 +188,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/request_cookies', methods=['GET'])
def urlnode_request_cookies(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.request_cookie:
return
@ -208,10 +198,7 @@ def urlnode_request_cookies(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/response_cookies', methods=['GET'])
def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.response_cookie:
return
@ -221,10 +208,7 @@ def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
def urlnode_post_request(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.posted_data:
return
if isinstance(urlnode.posted_data, (dict, list)):
@ -244,10 +228,7 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>', methods=['GET'])
def urlnode_details(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
to_return = BytesIO()
got_content = False
if hasattr(urlnode, 'body'):
@ -267,28 +248,19 @@ def urlnode_details(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/trigger_modules/', defaults={'force': False})
@app.route('/tree/<string:tree_uuid>/trigger_modules/<int:force>', methods=['GET'])
def trigger_modules(tree_uuid: str, force: int):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
lookyloo.trigger_modules(capture_dir, True if force else False)
lookyloo.trigger_modules(tree_uuid, True if force else False)
return redirect(url_for('modules', tree_uuid=tree_uuid))
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
def stats(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
stats = lookyloo.get_statistics(capture_dir)
stats = lookyloo.get_statistics(tree_uuid)
return render_template('statistics.html', uuid=tree_uuid, stats=stats)
@app.route('/tree/<string:tree_uuid>/modules', methods=['GET'])
def modules(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
modules_responses = lookyloo.get_modules_responses(capture_dir)
modules_responses = lookyloo.get_modules_responses(tree_uuid)
if not modules_responses:
return redirect(url_for('tree', tree_uuid=tree_uuid))
@ -319,50 +291,35 @@ def modules(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
def image(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
to_return = lookyloo.get_screenshot(capture_dir)
to_return = lookyloo.get_screenshot(tree_uuid)
return send_file(to_return, mimetype='image/png',
as_attachment=True, attachment_filename='image.png')
@app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
def html(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
to_return = lookyloo.get_html(capture_dir)
to_return = lookyloo.get_html(tree_uuid)
return send_file(to_return, mimetype='text/html',
as_attachment=True, attachment_filename='page.html')
@app.route('/tree/<string:tree_uuid>/cookies', methods=['GET'])
def cookies(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
to_return = lookyloo.get_cookies(capture_dir)
to_return = lookyloo.get_cookies(tree_uuid)
return send_file(to_return, mimetype='application/json',
as_attachment=True, attachment_filename='cookies.json')
@app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
def export(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
to_return = lookyloo.get_capture(capture_dir)
to_return = lookyloo.get_capture(tree_uuid)
return send_file(to_return, mimetype='application/zip',
as_attachment=True, attachment_filename='capture.zip')
@app.route('/redirects/<string:tree_uuid>', methods=['GET'])
def redirects(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return Response('Not available.', mimetype='text/text')
cache = lookyloo.capture_cache(capture_dir)
cache = lookyloo.capture_cache(tree_uuid)
if not cache:
return Response('Not available.', mimetype='text/text')
if not cache['redirects']:
@ -374,9 +331,7 @@ def redirects(tree_uuid: str):
@app.route('/cache_tree/<string:tree_uuid>', methods=['GET'])
def cache_tree(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if capture_dir:
lookyloo.load_tree(capture_dir)
lookyloo.cache_tree(tree_uuid)
return redirect(url_for('index'))
@ -389,16 +344,17 @@ def send_mail(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
def tree(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/<string:urlnode_uuid>', methods=['GET'])
def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None):
if tree_uuid == 'False':
flash("Unable to process your request. The domain may not exist, or splash isn't started", 'error')
return redirect(url_for('index'))
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
try:
cache = lookyloo.capture_cache(tree_uuid)
except MissingUUID:
flash(f'Unable to find this UUID ({tree_uuid}). The capture may still be ongoing, try again later.', 'error')
return redirect(url_for('index'))
cache = lookyloo.capture_cache(capture_dir)
if not cache:
flash('Invalid cache.', 'error')
return redirect(url_for('index'))
@ -412,10 +368,12 @@ def tree(tree_uuid: str):
enable_mail_notification = True
else:
enable_mail_notification = False
tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(capture_dir)
tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(tree_uuid)
return render_template('tree.html', tree_json=tree_json, start_time=start_time,
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid,
meta=meta, enable_mail_notification=enable_mail_notification)
meta=meta, enable_mail_notification=enable_mail_notification,
urlnode_uuid=urlnode_uuid)
except NoValidHarFile as e:
return render_template('error.html', error_message=e)
@ -427,8 +385,8 @@ def index_generic(show_hidden: bool=False):
cut_time = datetime.now() - timedelta(**time_delta_on_index)
else:
cut_time = None # type: ignore
for capture_dir in lookyloo.capture_dirs:
cached = lookyloo.capture_cache(capture_dir)
for capture_uuid in lookyloo.capture_uuids:
cached = lookyloo.capture_cache(capture_uuid)
if not cached or 'error' in cached:
continue
if show_hidden:
@ -459,15 +417,12 @@ def index():
def index_hidden():
return index_generic(show_hidden=True)
# Query API
@app.route('/json/<string:tree_uuid>/redirects', methods=['GET'])
def json_redirects(tree_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return {'error': 'Unknown UUID, try again later.'}
cache = lookyloo.capture_cache(capture_dir)
cache = lookyloo.capture_cache(tree_uuid)
if not cache:
return {'error': 'UUID missing in cache, try again later.'}
@ -477,8 +432,8 @@ def json_redirects(tree_uuid: str):
return to_return
if cache['incomplete_redirects']:
# Trigger tree build, get all redirects
lookyloo.load_tree(capture_dir)
cache = lookyloo.capture_cache(capture_dir)
lookyloo.load_tree(tree_uuid)
cache = lookyloo.capture_cache(tree_uuid)
if cache:
to_return['response']['redirects'] = cache['redirects']
else:

View File

@ -77,30 +77,15 @@ function urlnode_click(d) {
});
};
d3.selection.prototype.moveToFront = function() {
return this.each(function() {
this.parentNode.appendChild(this);
});
};
d3.selection.prototype.moveToBack = function() {
return this.each(function() {
var firstChild = this.parentNode.firstChild;
if (firstChild) {
this.parentNode.insertBefore(this, firstChild);
}
});
};
function hostnode_click_popup(d) {
window.open('/tree/' + treeUUID + '/hostname_popup/' + d.data.uuid, '_blank', 'width=1024,height=768,left=200,top=100');
};
function ProcessChildMessage(message) {
var element = document.getElementById("node_" + message);
function ProcessChildMessage(urlnode_uuid) {
var element = document.getElementById("node_" + urlnode_uuid);
element.scrollIntoView({behavior: "smooth", block: "center", inline: "nearest"});
var to_blink = d3.select("#node_" + message).select('text');
var to_blink = d3.select("#node_" + urlnode_uuid).select('text');
to_blink
.transition().duration(500) //Set transition
.style('fill', 'red')

View File

@ -105,15 +105,15 @@
{% if url['sane_js'] %}
<div>
{% if url['sane_js'] is string %}
{{ url['sane_js'] }}
{% else %}
This file is known as part of <b>{{ url['sane_js'][0] }}</b>
version <b>{{ url['sane_js'][1] }}</b>: <b>{{ url['sane_js'][2] }}</b>.
{% if url['sane_js'][3] > 1%}
{% if url['sane_js'] is string %}
<b>{{ url['sane_js'] }} </b>
{% else %}
This file is known as part of <b>{{ url['sane_js'][0] }}</b>
version <b>{{ url['sane_js'][1] }}</b>: <b>{{ url['sane_js'][2] }}</b>.
{% if url['sane_js'][3] > 1%}
It is also present in <b>{{ url['sane_js'][3] -1 }}</b> other libraries.
{%endif%}
{%endif%}
{%endif%}
</div>
{% endif %}

View File

@ -35,6 +35,14 @@
});
});
</script>
{% if urlnode_uuid %}
<script>
history.scrollRestoration = "manual";
window.addEventListener('DOMContentLoaded', (event) => {
ProcessChildMessage('{{urlnode_uuid}}');
});
</script>
{% endif%}
{% endblock %}
{% block content %}