mirror of https://github.com/CIRCL/lookyloo
chg: Use capture UUID as a reference everywhere
parent
fa935a6773
commit
05de56022f
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from lookyloo.lookyloo import Lookyloo
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
if __name__ == '__main__':
|
||||
lookyloo = Lookyloo()
|
||||
remove_pickles = input('Do you want to remove the pickles? Rebuilding will take a very long time. (y/N)')
|
||||
if remove_pickles == 'y':
|
||||
lookyloo.rebuild_all()
|
||||
else:
|
||||
lookyloo.rebuild_cache()
|
|
@ -13,6 +13,8 @@ from glob import glob
|
|||
import json
|
||||
import traceback
|
||||
from urllib.parse import urlparse
|
||||
import pickle
|
||||
from har2tree import CrawledTree
|
||||
|
||||
from bs4 import BeautifulSoup # type: ignore
|
||||
try:
|
||||
|
@ -210,3 +212,17 @@ def load_cookies(cookie_pseudofile: Optional[BufferedIOBase]=None) -> List[Dict[
|
|||
except Exception as e:
|
||||
print(f'Unable to load the cookie file: {e}')
|
||||
return to_return
|
||||
|
||||
|
||||
def load_pickle_tree(capture_dir: Path) -> Optional[CrawledTree]:
|
||||
pickle_file = capture_dir / 'tree.pickle'
|
||||
if pickle_file.exists():
|
||||
with pickle_file.open('rb') as _p:
|
||||
return pickle.load(_p)
|
||||
return None
|
||||
|
||||
|
||||
def remove_pickle_tree(capture_dir: Path) -> None:
|
||||
pickle_file = capture_dir / 'tree.pickle'
|
||||
if pickle_file.exists():
|
||||
pickle_file.unlink()
|
||||
|
|
|
@ -19,6 +19,7 @@ from urllib.parse import urlsplit
|
|||
from uuid import uuid4
|
||||
from zipfile import ZipFile
|
||||
|
||||
import publicsuffix2 # type: ignore
|
||||
from defang import refang # type: ignore
|
||||
from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode
|
||||
from redis import Redis
|
||||
|
@ -27,7 +28,7 @@ from scrapysplashwrapper import crawl
|
|||
from werkzeug.useragents import UserAgent
|
||||
|
||||
from .exceptions import NoValidHarFile, MissingUUID
|
||||
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template
|
||||
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template, load_pickle_tree, remove_pickle_tree
|
||||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
|
||||
|
||||
|
||||
|
@ -97,17 +98,60 @@ class Lookyloo():
|
|||
with self_generated_ua_file.open('w') as f:
|
||||
json.dump(to_store, f, indent=2)
|
||||
|
||||
def cache_tree(self, capture_uuid) -> None:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
|
||||
with open((capture_dir / 'uuid'), 'r') as f:
|
||||
uuid = f.read()
|
||||
har_files = sorted(capture_dir.glob('*.har'))
|
||||
try:
|
||||
ct = CrawledTree(har_files, uuid)
|
||||
except Har2TreeError as e:
|
||||
raise NoValidHarFile(e.message)
|
||||
|
||||
with (capture_dir / 'tree.pickle').open('wb') as _p:
|
||||
pickle.dump(ct, _p)
|
||||
|
||||
def get_crawled_tree(self, capture_uuid: str) -> CrawledTree:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
ct = load_pickle_tree(capture_dir)
|
||||
if not ct:
|
||||
self.cache_tree(capture_uuid)
|
||||
ct = load_pickle_tree(capture_dir)
|
||||
|
||||
if not ct:
|
||||
raise NoValidHarFile(f'Unable to get tree from {capture_dir}')
|
||||
|
||||
return ct
|
||||
|
||||
def load_tree(self, capture_uuid: str) -> Tuple[str, str, str, str, Dict[str, str]]:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
meta = {}
|
||||
if (capture_dir / 'meta').exists():
|
||||
with open((capture_dir / 'meta'), 'r') as f:
|
||||
meta = json.load(f)
|
||||
ct = self.get_crawled_tree(capture_uuid)
|
||||
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
|
||||
|
||||
def remove_pickle(self, capture_uuid: str) -> None:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
remove_pickle_tree(capture_dir)
|
||||
|
||||
def rebuild_cache(self) -> None:
|
||||
self.redis.flushdb()
|
||||
self._init_existing_dumps()
|
||||
|
||||
def remove_pickle(self, capture_dir: Path) -> None:
|
||||
if (capture_dir / 'tree.pickle').exists():
|
||||
(capture_dir / 'tree.pickle').unlink()
|
||||
|
||||
def rebuild_all(self) -> None:
|
||||
for capture_dir in self.capture_dirs:
|
||||
self.remove_pickle(capture_dir)
|
||||
remove_pickle_tree(capture_dir)
|
||||
self.rebuild_cache()
|
||||
|
||||
def get_config(self, entry: str) -> Any:
|
||||
|
@ -124,29 +168,39 @@ class Lookyloo():
|
|||
sample_config = json.load(_c)
|
||||
return sample_config[entry]
|
||||
|
||||
def get_urlnode_from_tree(self, capture_dir: Path, node_uuid: str) -> URLNode:
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
def get_urlnode_from_tree(self, capture_uuid: str, node_uuid: str) -> URLNode:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
ct = load_pickle_tree(capture_dir)
|
||||
if not ct:
|
||||
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
|
||||
return ct.root_hartree.get_url_node_by_uuid(node_uuid)
|
||||
|
||||
def get_hostnode_from_tree(self, capture_dir: Path, node_uuid: str) -> HostNode:
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
def get_hostnode_from_tree(self, capture_uuid: str, node_uuid: str) -> HostNode:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
ct = load_pickle_tree(capture_dir)
|
||||
if not ct:
|
||||
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
|
||||
return ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||
|
||||
def get_statistics(self, capture_dir: Path) -> Dict[str, Any]:
|
||||
# We need the pickle
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
def get_statistics(self, capture_uuid: str) -> Dict[str, Any]:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
ct = load_pickle_tree(capture_dir)
|
||||
if not ct:
|
||||
self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_dir}) is cached.')
|
||||
return {}
|
||||
return ct.root_hartree.stats
|
||||
|
||||
def trigger_modules(self, capture_dir: Path, force: bool=False) -> None:
|
||||
# We need the pickle
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
def trigger_modules(self, capture_uuid: str, force: bool=False) -> None:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
ct = load_pickle_tree(capture_dir)
|
||||
if not ct:
|
||||
self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_dir}) is cached.')
|
||||
return
|
||||
|
@ -165,8 +219,11 @@ class Lookyloo():
|
|||
else:
|
||||
self.vt.url_lookup(ct.root_hartree.har.root_url, force)
|
||||
|
||||
def get_modules_responses(self, capture_dir: Path) -> Optional[Dict[str, Any]]:
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
def get_modules_responses(self, capture_uuid: str) -> Optional[Dict[str, Any]]:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
ct = load_pickle_tree(capture_dir)
|
||||
if not ct:
|
||||
self.logger.warning(f'Unable to get the modules responses unless the tree ({capture_dir}) is cached.')
|
||||
return None
|
||||
|
@ -219,7 +276,7 @@ class Lookyloo():
|
|||
incomplete_redirects = False
|
||||
if redirects and har.need_tree_redirects:
|
||||
# load tree from disk, get redirects
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
ct = load_pickle_tree(capture_dir)
|
||||
if ct:
|
||||
redirects = ct.redirects
|
||||
else:
|
||||
|
@ -231,6 +288,7 @@ class Lookyloo():
|
|||
'timestamp': har.initial_start_time,
|
||||
'url': har.root_url,
|
||||
'redirects': json.dumps(redirects),
|
||||
'capture_dir': str(capture_dir),
|
||||
'incomplete_redirects': 1 if incomplete_redirects else 0}
|
||||
if (capture_dir / 'no_index').exists(): # If the folders claims anonymity
|
||||
cache['no_index'] = 1
|
||||
|
@ -238,19 +296,27 @@ class Lookyloo():
|
|||
self.redis.hmset(str(capture_dir), cache)
|
||||
self.redis.hset('lookup_dirs', uuid, str(capture_dir))
|
||||
|
||||
def capture_cache(self, capture_dir: Path) -> Optional[Dict[str, Any]]:
|
||||
@property
|
||||
def capture_uuids(self):
|
||||
return self.redis.hkeys('lookup_dirs')
|
||||
|
||||
def capture_cache(self, capture_uuid: str) -> Dict[str, Any]:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
if self.redis.hget(str(capture_dir), 'incomplete_redirects') == '1':
|
||||
# try to rebuild the cache
|
||||
self._set_capture_cache(capture_dir, force=True)
|
||||
cached = self.redis.hgetall(str(capture_dir))
|
||||
if all(key in cached.keys() for key in ['uuid', 'title', 'timestamp', 'url', 'redirects']):
|
||||
if all(key in cached.keys() for key in ['uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir']):
|
||||
cached['redirects'] = json.loads(cached['redirects'])
|
||||
cached['capture_dir'] = Path(cached['capture_dir'])
|
||||
return cached
|
||||
elif 'error' in cached:
|
||||
return cached
|
||||
else:
|
||||
self.logger.warning(f'Cache ({capture_dir}) is invalid: {json.dumps(cached, indent=2)}')
|
||||
return None
|
||||
return {}
|
||||
|
||||
def _init_existing_dumps(self) -> None:
|
||||
for capture_dir in self.capture_dirs:
|
||||
|
@ -270,8 +336,8 @@ class Lookyloo():
|
|||
f.write(str(uuid4()))
|
||||
return sorted(self.scrape_dir.iterdir(), reverse=True)
|
||||
|
||||
def lookup_capture_dir(self, uuid: str) -> Union[Path, None]:
|
||||
capture_dir = self.redis.hget('lookup_dirs', uuid)
|
||||
def lookup_capture_dir(self, capture_uuid: str) -> Union[Path, None]:
|
||||
capture_dir = self.redis.hget('lookup_dirs', capture_uuid)
|
||||
if capture_dir:
|
||||
return Path(capture_dir)
|
||||
return None
|
||||
|
@ -300,28 +366,20 @@ class Lookyloo():
|
|||
return True
|
||||
return False
|
||||
|
||||
def _load_pickle(self, pickle_file: Path) -> Optional[CrawledTree]:
|
||||
if pickle_file.exists():
|
||||
with pickle_file.open('rb') as _p:
|
||||
return pickle.load(_p)
|
||||
return None
|
||||
|
||||
def send_mail(self, capture_uuid: str, email: str='', comment: str='') -> None:
|
||||
if not self.get_config('enable_mail_notification'):
|
||||
return
|
||||
|
||||
redirects = ''
|
||||
initial_url = ''
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if capture_dir:
|
||||
cache = self.capture_cache(capture_dir)
|
||||
if cache:
|
||||
initial_url = cache['url']
|
||||
if 'redirects' in cache and cache['redirects']:
|
||||
redirects = "Redirects:\n"
|
||||
redirects += '\n'.join(cache['redirects'])
|
||||
else:
|
||||
redirects = "No redirects."
|
||||
cache = self.capture_cache(capture_uuid)
|
||||
if cache:
|
||||
initial_url = cache['url']
|
||||
if 'redirects' in cache and cache['redirects']:
|
||||
redirects = "Redirects:\n"
|
||||
redirects += '\n'.join(cache['redirects'])
|
||||
else:
|
||||
redirects = "No redirects."
|
||||
|
||||
email_config = self.get_config('email')
|
||||
msg = EmailMessage()
|
||||
|
@ -371,31 +429,10 @@ class Lookyloo():
|
|||
with metafile.open('w') as f:
|
||||
json.dump(to_dump, f)
|
||||
|
||||
def get_crawled_tree(self, capture_dir: Path) -> CrawledTree:
|
||||
pickle_file = capture_dir / 'tree.pickle'
|
||||
ct = self._load_pickle(pickle_file)
|
||||
if not ct:
|
||||
with open((capture_dir / 'uuid'), 'r') as f:
|
||||
uuid = f.read()
|
||||
har_files = sorted(capture_dir.glob('*.har'))
|
||||
try:
|
||||
ct = CrawledTree(har_files, uuid)
|
||||
self._ensure_meta(capture_dir, ct)
|
||||
except Har2TreeError as e:
|
||||
raise NoValidHarFile(e.message)
|
||||
with pickle_file.open('wb') as _p:
|
||||
pickle.dump(ct, _p)
|
||||
return ct
|
||||
|
||||
def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, str]]:
|
||||
meta = {}
|
||||
if (capture_dir / 'meta').exists():
|
||||
with open((capture_dir / 'meta'), 'r') as f:
|
||||
meta = json.load(f)
|
||||
ct = self.get_crawled_tree(capture_dir)
|
||||
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
|
||||
|
||||
def _get_raw(self, capture_dir: Path, extension: str='*', all_files: bool=True) -> BytesIO:
|
||||
def _get_raw(self, capture_uuid: str, extension: str='*', all_files: bool=True) -> BytesIO:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
all_paths = sorted(list(capture_dir.glob(f'*.{extension}')))
|
||||
if not all_files:
|
||||
# Only get the first one in the list
|
||||
|
@ -410,17 +447,17 @@ class Lookyloo():
|
|||
to_return.seek(0)
|
||||
return to_return
|
||||
|
||||
def get_html(self, capture_dir: Path, all_html: bool=False) -> BytesIO:
|
||||
return self._get_raw(capture_dir, 'html', all_html)
|
||||
def get_html(self, capture_uuid: str, all_html: bool=False) -> BytesIO:
|
||||
return self._get_raw(capture_uuid, 'html', all_html)
|
||||
|
||||
def get_cookies(self, capture_dir: Path, all_cookies: bool=False) -> BytesIO:
|
||||
return self._get_raw(capture_dir, 'cookies.json', all_cookies)
|
||||
def get_cookies(self, capture_uuid: str, all_cookies: bool=False) -> BytesIO:
|
||||
return self._get_raw(capture_uuid, 'cookies.json', all_cookies)
|
||||
|
||||
def get_screenshot(self, capture_dir: Path, all_images: bool=False) -> BytesIO:
|
||||
return self._get_raw(capture_dir, 'png', all_images)
|
||||
def get_screenshot(self, capture_uuid: str, all_images: bool=False) -> BytesIO:
|
||||
return self._get_raw(capture_uuid, 'png', all_images)
|
||||
|
||||
def get_capture(self, capture_dir: Path) -> BytesIO:
|
||||
return self._get_raw(capture_dir)
|
||||
def get_capture(self, capture_uuid: str) -> BytesIO:
|
||||
return self._get_raw(capture_uuid)
|
||||
|
||||
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None,
|
||||
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
|
||||
|
@ -505,8 +542,12 @@ class Lookyloo():
|
|||
self._set_capture_cache(dirpath)
|
||||
return perma_uuid
|
||||
|
||||
def get_hostnode_investigator(self, capture_dir: Path, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
def get_hostnode_investigator(self, capture_uuid: str, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find {capture_uuid}')
|
||||
|
||||
ct = load_pickle_tree(capture_dir)
|
||||
if not ct:
|
||||
raise MissingUUID(f'Unable to find {capture_dir}')
|
||||
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||
|
@ -536,16 +577,17 @@ class Lookyloo():
|
|||
else:
|
||||
to_append['url_path_short'] = to_append['url_path']
|
||||
|
||||
# Optional: SaneJS information
|
||||
if hasattr(url, 'body_hash') and url.body_hash in sanejs_lookups:
|
||||
if sanejs_lookups[url.body_hash]:
|
||||
if isinstance(sanejs_lookups[url.body_hash], list):
|
||||
libname, version, path = sanejs_lookups[url.body_hash][0].split("|")
|
||||
other_files = len(sanejs_lookups[url.body_hash])
|
||||
to_append['sane_js'] = (libname, version, path, other_files)
|
||||
else:
|
||||
# Predefined generic file
|
||||
to_append['sane_js'] = sanejs_lookups[url.body_hash]
|
||||
if not url.empty_response:
|
||||
# Optional: SaneJS information
|
||||
if url.body_hash in sanejs_lookups:
|
||||
if sanejs_lookups[url.body_hash]:
|
||||
if isinstance(sanejs_lookups[url.body_hash], list):
|
||||
libname, version, path = sanejs_lookups[url.body_hash][0].split("|")
|
||||
other_files = len(sanejs_lookups[url.body_hash])
|
||||
to_append['sane_js'] = (libname, version, path, other_files)
|
||||
else:
|
||||
# Predefined generic file
|
||||
to_append['sane_js'] = sanejs_lookups[url.body_hash]
|
||||
|
||||
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
||||
if hasattr(url, 'cookies_sent'):
|
||||
|
|
|
@ -52,10 +52,16 @@ class SaneJavaScript():
|
|||
"71db01662075fac031dea18b2c766826c77dbab01400a8642cdc7059394841d5df9020076554c3beca6f808187d42e1a1acc98fad9a0e1ad32ae869145f53746": "This is a 1*1 pixel GIF",
|
||||
"49b8daf1f5ba868bc8c6b224c787a75025ca36513ef8633d1d8f34e48ee0b578f466fcc104a7bed553404ddc5f9faff3fef5f894b31cd57f32245e550fad656a": "This is a 1*1 pixel GIF",
|
||||
"c57ebbadcf59f982ba28da35fdbd5e5369a8500a2e1edad0dc9c9174de6fd99f437953732e545b95d3de5943c61077b6b949c989f49553ff2e483f68fcc30641": "This is a 1*1 pixel GIF",
|
||||
"c87bf81fd70cf6434ca3a6c05ad6e9bd3f1d96f77dddad8d45ee043b126b2cb07a5cf23b4137b9d8462cd8a9adf2b463ab6de2b38c93db72d2d511ca60e3b57e": "This is a 1*1 pixel GIF",
|
||||
"fd8b021f0236e487bfee13bf8f0ae98760abc492f7ca3023e292631979e135cb4ccb0c89b6234971b060ad72c0ca4474cbb5092c6c7a3255d81a54a36277b486": "This is a 1*1 pixel GIF",
|
||||
"235479f42cbbe0a4b0100167fece0d14c9b47d272b3ba8322bcfe8539f055bf31d500e7b2995cc968ebf73034e039f59c5f0f9410428663034bf119d74b5672c": "This is a 1*1 pixel GIF",
|
||||
"a85e09c3b5dbb560f4e03ba880047dbc8b4999a64c1f54fbfbca17ee0bcbed3bc6708d699190b56668e464a59358d6b534c3963a1329ba01db21075ef5bedace": "This is a 1*1 pixel GIF",
|
||||
"27656d6106a6da0c84174ba7a6307e6f1c4b3f2cc085c8466b6a25d54331035dabc7081aac208d960d8d37c5577547628c0d1c4b77bb4cf254c71859673feec1": "This is a 1*1 pixel GIF",
|
||||
# "": "This is a 1*1 pixel GIF",
|
||||
"f1c33e72643ce366fd578e3b5d393799e8c9ea27b180987826af43b4fc00b65a4eaae5e6426a23448956fee99e3108c6a86f32fb4896c156e24af0571a11c498": "This is a 1*1 pixel PNG",
|
||||
"dc7c40381b3d22919e32c1b700ccb77b1b0aea2690642d01c1ac802561e135c01d5a4d2a0ea18efc0ec3362e8c549814a10a23563f1f56bd62aee0ced7e2bd99": "This is a 1*1 pixel PNG",
|
||||
"c2c239cb5cdd0b670780ad6414ef6be9ccd4c21ce46bb93d1fa3120ac812f1679445162978c3df05cb2e1582a1844cc4c41cf74960b8fdae3123999c5d2176cc": "This is a 1*1 pixel PNG",
|
||||
"6ad523f5b65487369d305613366b9f68dcdeee225291766e3b25faf45439ca069f614030c08ca54c714fdbf7a944fac489b1515a8bf9e0d3191e1bcbbfe6a9df": "This is a 1*1 pixel PNG",
|
||||
# "": "This is a 1*1 pixel PNG",
|
||||
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e": "This is an empty file"
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ run_backend = "bin/run_backend.py"
|
|||
async_scrape = "bin/async_scrape.py"
|
||||
shutdown = "bin/shutdown.py"
|
||||
stop = "bin/stop.py"
|
||||
rebuild_caches = "bin/rebuild_caches.py"
|
||||
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
|
|
2
setup.py
2
setup.py
|
@ -13,7 +13,7 @@ setup(
|
|||
description='Web interface to track the trackers.',
|
||||
packages=['lookyloo'],
|
||||
scripts=['bin/start_website.py', 'bin/start.py', 'bin/run_backend.py', 'bin/async_scrape.py',
|
||||
'bin/shutdown.py', 'bin/stop.py'],
|
||||
'bin/shutdown.py', 'bin/stop.py', 'bin/rebuild_caches.py'],
|
||||
include_package_data=True,
|
||||
classifiers=[
|
||||
'License :: OSI Approved :: BSD License',
|
||||
|
|
|
@ -11,7 +11,7 @@ FileSaver="v2.0.2"
|
|||
|
||||
wget -q https://raw.githubusercontent.com/eligrey/FileSaver.js/${FileSaver}/src/FileSaver.js -O web/static/FileSaver.js
|
||||
|
||||
datatables="1.10.20"
|
||||
datatables="1.10.21"
|
||||
|
||||
wget -q https://cdn.datatables.net/v/bs4/dt-${datatables}/datatables.min.css -O web/static/datatables.min.css
|
||||
wget -q https://cdn.datatables.net/v/bs4/dt-${datatables}/datatables.min.js -O web/static/datatables.min.js
|
||||
|
|
|
@ -14,7 +14,7 @@ from flask_httpauth import HTTPDigestAuth # type: ignore
|
|||
|
||||
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents
|
||||
from lookyloo.lookyloo import Lookyloo
|
||||
from lookyloo.exceptions import NoValidHarFile
|
||||
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
||||
from .proxied import ReverseProxied
|
||||
|
||||
from typing import Optional, Dict, Any
|
||||
|
@ -96,11 +96,11 @@ def rebuild_cache():
|
|||
@app.route('/tree/<string:tree_uuid>/rebuild')
|
||||
@auth.login_required
|
||||
def rebuild_tree(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if capture_dir:
|
||||
lookyloo.remove_pickle(capture_dir)
|
||||
try:
|
||||
lookyloo.remove_pickle(tree_uuid)
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
return redirect(url_for('index'))
|
||||
except Exception:
|
||||
return redirect(url_for('index'))
|
||||
|
||||
|
||||
@app.route('/submit', methods=['POST', 'GET'])
|
||||
|
@ -140,10 +140,7 @@ def scrape_web():
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/hostname/<string:node_uuid>/text', methods=['GET'])
|
||||
def hostnode_details_text(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
hostnode = lookyloo.get_hostnode_from_tree(capture_dir, node_uuid)
|
||||
hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid)
|
||||
urls = []
|
||||
for url in hostnode.urls:
|
||||
urls.append(url.name)
|
||||
|
@ -159,10 +156,6 @@ def hostnode_details_text(tree_uuid: str, node_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/hostname_popup/<string:node_uuid>', methods=['GET'])
|
||||
def hostnode_popup(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
|
||||
keys_response = {
|
||||
'js': "/static/javascript.png",
|
||||
'exe': "/static/exe.png",
|
||||
|
@ -182,7 +175,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
|
|||
'request_cookie': "/static/cookie_read.png",
|
||||
}
|
||||
|
||||
hostnode, urls = lookyloo.get_hostnode_investigator(capture_dir, node_uuid)
|
||||
hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid)
|
||||
|
||||
return render_template('hostname_popup.html',
|
||||
tree_uuid=tree_uuid,
|
||||
|
@ -195,10 +188,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/request_cookies', methods=['GET'])
|
||||
def urlnode_request_cookies(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
|
||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||
if not urlnode.request_cookie:
|
||||
return
|
||||
|
||||
|
@ -208,10 +198,7 @@ def urlnode_request_cookies(tree_uuid: str, node_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/response_cookies', methods=['GET'])
|
||||
def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
|
||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||
if not urlnode.response_cookie:
|
||||
return
|
||||
|
||||
|
@ -221,10 +208,7 @@ def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
|
||||
def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
|
||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||
if not urlnode.posted_data:
|
||||
return
|
||||
if isinstance(urlnode.posted_data, (dict, list)):
|
||||
|
@ -244,10 +228,7 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>', methods=['GET'])
|
||||
def urlnode_details(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
|
||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||
to_return = BytesIO()
|
||||
got_content = False
|
||||
if hasattr(urlnode, 'body'):
|
||||
|
@ -267,28 +248,19 @@ def urlnode_details(tree_uuid: str, node_uuid: str):
|
|||
@app.route('/tree/<string:tree_uuid>/trigger_modules/', defaults={'force': False})
|
||||
@app.route('/tree/<string:tree_uuid>/trigger_modules/<int:force>', methods=['GET'])
|
||||
def trigger_modules(tree_uuid: str, force: int):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
lookyloo.trigger_modules(capture_dir, True if force else False)
|
||||
lookyloo.trigger_modules(tree_uuid, True if force else False)
|
||||
return redirect(url_for('modules', tree_uuid=tree_uuid))
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
|
||||
def stats(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
stats = lookyloo.get_statistics(capture_dir)
|
||||
stats = lookyloo.get_statistics(tree_uuid)
|
||||
return render_template('statistics.html', uuid=tree_uuid, stats=stats)
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/modules', methods=['GET'])
|
||||
def modules(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
modules_responses = lookyloo.get_modules_responses(capture_dir)
|
||||
modules_responses = lookyloo.get_modules_responses(tree_uuid)
|
||||
if not modules_responses:
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
|
||||
|
@ -319,50 +291,35 @@ def modules(tree_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
|
||||
def image(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
to_return = lookyloo.get_screenshot(capture_dir)
|
||||
to_return = lookyloo.get_screenshot(tree_uuid)
|
||||
return send_file(to_return, mimetype='image/png',
|
||||
as_attachment=True, attachment_filename='image.png')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
|
||||
def html(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
to_return = lookyloo.get_html(capture_dir)
|
||||
to_return = lookyloo.get_html(tree_uuid)
|
||||
return send_file(to_return, mimetype='text/html',
|
||||
as_attachment=True, attachment_filename='page.html')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/cookies', methods=['GET'])
|
||||
def cookies(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
to_return = lookyloo.get_cookies(capture_dir)
|
||||
to_return = lookyloo.get_cookies(tree_uuid)
|
||||
return send_file(to_return, mimetype='application/json',
|
||||
as_attachment=True, attachment_filename='cookies.json')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
|
||||
def export(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
to_return = lookyloo.get_capture(capture_dir)
|
||||
to_return = lookyloo.get_capture(tree_uuid)
|
||||
return send_file(to_return, mimetype='application/zip',
|
||||
as_attachment=True, attachment_filename='capture.zip')
|
||||
|
||||
|
||||
@app.route('/redirects/<string:tree_uuid>', methods=['GET'])
|
||||
def redirects(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
cache = lookyloo.capture_cache(capture_dir)
|
||||
cache = lookyloo.capture_cache(tree_uuid)
|
||||
if not cache:
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
if not cache['redirects']:
|
||||
|
@ -374,9 +331,7 @@ def redirects(tree_uuid: str):
|
|||
|
||||
@app.route('/cache_tree/<string:tree_uuid>', methods=['GET'])
|
||||
def cache_tree(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if capture_dir:
|
||||
lookyloo.load_tree(capture_dir)
|
||||
lookyloo.cache_tree(tree_uuid)
|
||||
return redirect(url_for('index'))
|
||||
|
||||
|
||||
|
@ -389,16 +344,17 @@ def send_mail(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
||||
def tree(tree_uuid: str):
|
||||
@app.route('/tree/<string:tree_uuid>/<string:urlnode_uuid>', methods=['GET'])
|
||||
def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None):
|
||||
if tree_uuid == 'False':
|
||||
flash("Unable to process your request. The domain may not exist, or splash isn't started", 'error')
|
||||
return redirect(url_for('index'))
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
try:
|
||||
cache = lookyloo.capture_cache(tree_uuid)
|
||||
except MissingUUID:
|
||||
flash(f'Unable to find this UUID ({tree_uuid}). The capture may still be ongoing, try again later.', 'error')
|
||||
return redirect(url_for('index'))
|
||||
|
||||
cache = lookyloo.capture_cache(capture_dir)
|
||||
if not cache:
|
||||
flash('Invalid cache.', 'error')
|
||||
return redirect(url_for('index'))
|
||||
|
@ -412,10 +368,12 @@ def tree(tree_uuid: str):
|
|||
enable_mail_notification = True
|
||||
else:
|
||||
enable_mail_notification = False
|
||||
tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(capture_dir)
|
||||
tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(tree_uuid)
|
||||
return render_template('tree.html', tree_json=tree_json, start_time=start_time,
|
||||
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid,
|
||||
meta=meta, enable_mail_notification=enable_mail_notification)
|
||||
meta=meta, enable_mail_notification=enable_mail_notification,
|
||||
urlnode_uuid=urlnode_uuid)
|
||||
|
||||
except NoValidHarFile as e:
|
||||
return render_template('error.html', error_message=e)
|
||||
|
||||
|
@ -427,8 +385,8 @@ def index_generic(show_hidden: bool=False):
|
|||
cut_time = datetime.now() - timedelta(**time_delta_on_index)
|
||||
else:
|
||||
cut_time = None # type: ignore
|
||||
for capture_dir in lookyloo.capture_dirs:
|
||||
cached = lookyloo.capture_cache(capture_dir)
|
||||
for capture_uuid in lookyloo.capture_uuids:
|
||||
cached = lookyloo.capture_cache(capture_uuid)
|
||||
if not cached or 'error' in cached:
|
||||
continue
|
||||
if show_hidden:
|
||||
|
@ -459,15 +417,12 @@ def index():
|
|||
def index_hidden():
|
||||
return index_generic(show_hidden=True)
|
||||
|
||||
|
||||
# Query API
|
||||
|
||||
|
||||
@app.route('/json/<string:tree_uuid>/redirects', methods=['GET'])
|
||||
def json_redirects(tree_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return {'error': 'Unknown UUID, try again later.'}
|
||||
cache = lookyloo.capture_cache(capture_dir)
|
||||
cache = lookyloo.capture_cache(tree_uuid)
|
||||
if not cache:
|
||||
return {'error': 'UUID missing in cache, try again later.'}
|
||||
|
||||
|
@ -477,8 +432,8 @@ def json_redirects(tree_uuid: str):
|
|||
return to_return
|
||||
if cache['incomplete_redirects']:
|
||||
# Trigger tree build, get all redirects
|
||||
lookyloo.load_tree(capture_dir)
|
||||
cache = lookyloo.capture_cache(capture_dir)
|
||||
lookyloo.load_tree(tree_uuid)
|
||||
cache = lookyloo.capture_cache(tree_uuid)
|
||||
if cache:
|
||||
to_return['response']['redirects'] = cache['redirects']
|
||||
else:
|
||||
|
|
|
@ -77,30 +77,15 @@ function urlnode_click(d) {
|
|||
});
|
||||
};
|
||||
|
||||
d3.selection.prototype.moveToFront = function() {
|
||||
return this.each(function() {
|
||||
this.parentNode.appendChild(this);
|
||||
});
|
||||
};
|
||||
|
||||
d3.selection.prototype.moveToBack = function() {
|
||||
return this.each(function() {
|
||||
var firstChild = this.parentNode.firstChild;
|
||||
if (firstChild) {
|
||||
this.parentNode.insertBefore(this, firstChild);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
function hostnode_click_popup(d) {
|
||||
window.open('/tree/' + treeUUID + '/hostname_popup/' + d.data.uuid, '_blank', 'width=1024,height=768,left=200,top=100');
|
||||
};
|
||||
|
||||
function ProcessChildMessage(message) {
|
||||
var element = document.getElementById("node_" + message);
|
||||
function ProcessChildMessage(urlnode_uuid) {
|
||||
var element = document.getElementById("node_" + urlnode_uuid);
|
||||
element.scrollIntoView({behavior: "smooth", block: "center", inline: "nearest"});
|
||||
|
||||
var to_blink = d3.select("#node_" + message).select('text');
|
||||
var to_blink = d3.select("#node_" + urlnode_uuid).select('text');
|
||||
to_blink
|
||||
.transition().duration(500) //Set transition
|
||||
.style('fill', 'red')
|
||||
|
|
|
@ -105,15 +105,15 @@
|
|||
|
||||
{% if url['sane_js'] %}
|
||||
<div>
|
||||
{% if url['sane_js'] is string %}
|
||||
{{ url['sane_js'] }}
|
||||
{% else %}
|
||||
This file is known as part of <b>{{ url['sane_js'][0] }}</b>
|
||||
version <b>{{ url['sane_js'][1] }}</b>: <b>{{ url['sane_js'][2] }}</b>.
|
||||
{% if url['sane_js'][3] > 1%}
|
||||
{% if url['sane_js'] is string %}
|
||||
<b>{{ url['sane_js'] }} </b>
|
||||
{% else %}
|
||||
This file is known as part of <b>{{ url['sane_js'][0] }}</b>
|
||||
version <b>{{ url['sane_js'][1] }}</b>: <b>{{ url['sane_js'][2] }}</b>.
|
||||
{% if url['sane_js'][3] > 1%}
|
||||
It is also present in <b>{{ url['sane_js'][3] -1 }}</b> other libraries.
|
||||
{%endif%}
|
||||
{%endif%}
|
||||
{%endif%}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
|
|
@ -35,6 +35,14 @@
|
|||
});
|
||||
});
|
||||
</script>
|
||||
{% if urlnode_uuid %}
|
||||
<script>
|
||||
history.scrollRestoration = "manual";
|
||||
window.addEventListener('DOMContentLoaded', (event) => {
|
||||
ProcessChildMessage('{{urlnode_uuid}}');
|
||||
});
|
||||
</script>
|
||||
{% endif%}
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
|
|
Loading…
Reference in New Issue