Merge branch 'new_popup'

pull/79/head
Raphaël Vinot 2020-05-25 10:30:52 +02:00
commit 3e5a6cb8ab
13 changed files with 382 additions and 1913 deletions

View File

@ -3,10 +3,11 @@
"splash_loglevel": "WARNING", "splash_loglevel": "WARNING",
"only_global_lookups": true, "only_global_lookups": true,
"splash_url": "http://127.0.0.1:8050", "splash_url": "http://127.0.0.1:8050",
"default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
"cache_clean_user": {}, "cache_clean_user": {},
"time_delta_on_index": { "time_delta_on_index": {
"weeks": 0, "weeks": 1,
"days": 1, "days": 0,
"hours": 0 "hours": 0
}, },
"enable_mail_notification": false, "enable_mail_notification": false,
@ -23,6 +24,7 @@
"splash_loglevel": "(Splash) INFO is *very* verbose.", "splash_loglevel": "(Splash) INFO is *very* verbose.",
"only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network", "only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network",
"splash_url": "URL to connect to splash", "splash_url": "URL to connect to splash",
"default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA",
"cache_clean_user": "Format: {username: password}", "cache_clean_user": "Format: {username: password}",
"time_delta_on_index": "Time interval of the capture displayed on the index", "time_delta_on_index": "Time interval of the capture displayed on the index",
"enable_mail_notification": "Enable email notification or not", "enable_mail_notification": "Enable email notification or not",

View File

@ -2,5 +2,8 @@
"VirusTotal": { "VirusTotal": {
"apikey": "KEY", "apikey": "KEY",
"autosubmit": false "autosubmit": false
},
"SaneJS": {
"enabled": true
} }
} }

43
doc/notes_papers.md Normal file
View File

@ -0,0 +1,43 @@
# AdGraph
## Implementation
* https://github.com/uiowa-irl/AdGraph
4000+ lines of patch on Chromium version 69.0.3441.0 (released 25 May 2018)
## Paper
* https://umariqbal.com/papers/adgraph-sp2020.pdf
## Key points for lookyloo
### Static, node by node
* features of the node
* keywords in URL
* keywords in content
* length & parameters of the URL
* On image: OCR (?)
* Domain => blocklists (ublock)
* Javascript analysis:
* eval
* specific keywords (tracking, ads, fingerprint...)
* specific JS calls (track mouse, scrolling)
* Async calls are very often used by ads, recommandation: https://www.iab.com/wp-content/uploads/2017/08/IABNewAdPortfolio_FINAL_2017.pdf
* /!\ anything obfuscated is just under the radar
### Dynamic, pased on the tree
* size
* position in the tree
* parent features
* siblings
* number and type of children
# Other ressources
* Ads standards: https://github.com/InteractiveAdvertisingBureau - https://iabtechlab.com/standards/
* Standard API for Ads bidding: https://github.com/prebid/

View File

@ -20,3 +20,7 @@ class CreateDirectoryException(LookylooException):
class ConfigError(LookylooException): class ConfigError(LookylooException):
pass pass
class MissingUUID(LookylooException):
pass

View File

@ -18,14 +18,13 @@ from uuid import uuid4
from zipfile import ZipFile from zipfile import ZipFile
from defang import refang # type: ignore from defang import refang # type: ignore
from har2tree import CrawledTree, Har2TreeError, HarFile from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode
from pysanejs import SaneJS
from redis import Redis from redis import Redis
from scrapysplashwrapper import crawl from scrapysplashwrapper import crawl
from .exceptions import NoValidHarFile from .exceptions import NoValidHarFile, MissingUUID
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template
from .modules import VirusTotal from .modules import VirusTotal, SaneJavaScript
class Lookyloo(): class Lookyloo():
@ -50,17 +49,14 @@ class Lookyloo():
self.vt = VirusTotal(self.configs['modules']['VirusTotal']) self.vt = VirusTotal(self.configs['modules']['VirusTotal'])
if not self.vt.available: if not self.vt.available:
self.logger.warning('Unable to setup the VirusTotal module') self.logger.warning('Unable to setup the VirusTotal module')
if 'SaneJS' in self.configs['modules']:
self.sanejs = SaneJavaScript(self.configs['modules']['SaneJS'])
if not self.sanejs.available:
self.logger.warning('Unable to setup the SaneJS module')
if not self.redis.exists('cache_loaded'): if not self.redis.exists('cache_loaded'):
self._init_existing_dumps() self._init_existing_dumps()
# Try to reach sanejs
self.sanejs = SaneJS()
if not self.sanejs.is_up:
self.use_sane_js = False
else:
self.use_sane_js = True
def rebuild_cache(self) -> None: def rebuild_cache(self) -> None:
self.redis.flushdb() self.redis.flushdb()
self._init_existing_dumps() self._init_existing_dumps()
@ -88,6 +84,18 @@ class Lookyloo():
sample_config = json.load(_c) sample_config = json.load(_c)
return sample_config[entry] return sample_config[entry]
def get_urlnode_from_tree(self, capture_dir: Path, node_uuid: str) -> URLNode:
ct = self._load_pickle(capture_dir / 'tree.pickle')
if not ct:
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
return ct.root_hartree.get_url_node_by_uuid(node_uuid)
def get_hostnode_from_tree(self, capture_dir: Path, node_uuid: str) -> HostNode:
ct = self._load_pickle(capture_dir / 'tree.pickle')
if not ct:
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
return ct.root_hartree.get_host_node_by_uuid(node_uuid)
def get_statistics(self, capture_dir: Path) -> Dict[str, Any]: def get_statistics(self, capture_dir: Path) -> Dict[str, Any]:
# We need the pickle # We need the pickle
ct = self._load_pickle(capture_dir / 'tree.pickle') ct = self._load_pickle(capture_dir / 'tree.pickle')
@ -151,7 +159,7 @@ class Lookyloo():
self.redis.hset('lookup_dirs', uuid, str(capture_dir)) self.redis.hset('lookup_dirs', uuid, str(capture_dir))
return return
har = HarFile(har_files[0]) har = HarFile(har_files[0], uuid)
redirects = har.initial_redirects redirects = har.initial_redirects
incomplete_redirects = False incomplete_redirects = False
@ -268,7 +276,7 @@ class Lookyloo():
except Exception as e: except Exception as e:
logging.exception(e) logging.exception(e)
def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, str, Dict[str, str]]: def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, str]]:
har_files = sorted(capture_dir.glob('*.har')) har_files = sorted(capture_dir.glob('*.har'))
pickle_file = capture_dir / 'tree.pickle' pickle_file = capture_dir / 'tree.pickle'
try: try:
@ -284,7 +292,7 @@ class Lookyloo():
ct = CrawledTree(har_files, uuid) ct = CrawledTree(har_files, uuid)
with pickle_file.open('wb') as _p: with pickle_file.open('wb') as _p:
pickle.dump(ct, _p) pickle.dump(ct, _p)
return str(pickle_file), ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
except Har2TreeError as e: except Har2TreeError as e:
raise NoValidHarFile(e.message) raise NoValidHarFile(e.message)
@ -312,11 +320,6 @@ class Lookyloo():
def get_capture(self, capture_dir: Path) -> BytesIO: def get_capture(self, capture_dir: Path) -> BytesIO:
return self._get_raw(capture_dir) return self._get_raw(capture_dir)
def sane_js_query(self, sha512: str) -> Dict[str, Any]:
if self.use_sane_js:
return self.sanejs.sha512(sha512)
return {'response': []}
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None, def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None,
depth: int=1, listing: bool=True, user_agent: Optional[str]=None, depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
perma_uuid: str=None, os: str=None, browser: str=None) -> Union[bool, str]: perma_uuid: str=None, os: str=None, browser: str=None) -> Union[bool, str]:
@ -339,7 +342,12 @@ class Lookyloo():
return False return False
cookies = load_cookies(cookies_pseudofile) cookies = load_cookies(cookies_pseudofile)
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=user_agent, if not user_agent:
# Catch case where the UA is broken on the UI, and the async submission.
ua: str = self.get_config('default_user_agent') # type: ignore
else:
ua = user_agent
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
log_enabled=True, log_level=self.get_config('splash_loglevel')) log_enabled=True, log_level=self.get_config('splash_loglevel'))
if not items: if not items:
# broken # broken

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from typing import Dict, Any, Optional from typing import Dict, Any, Optional, List, Union
from datetime import date from datetime import date
import hashlib import hashlib
import json import json
@ -13,6 +13,104 @@ from .helpers import get_homedir
from .exceptions import ConfigError from .exceptions import ConfigError
import vt # type: ignore import vt # type: ignore
from pysanejs import SaneJS
class SaneJavaScript():
skip_lookup: Dict[str, str] = {
"717ea0ff7f3f624c268eccb244e24ec1305ab21557abb3d6f1a7e183ff68a2d28f13d1d2af926c9ef6d1fb16dd8cbe34cd98cacf79091dddc7874dcee21ecfdc": "This is a 1*1 pixel GIF",
"e508d5d17e94d14b126164082342a9ca4774f404e87a3dd56c26812493ee18d9c3d6daacca979134a94a003066aca24116de874596d00d1e52130c1283d54209": "This is a 1*1 pixel GIF",
"2d073e10ae40fde434eb31cbedd581a35cd763e51fb7048b88caa5f949b1e6105e37a228c235bc8976e8db58ed22149cfccf83b40ce93a28390566a28975744a": "This is a 1*1 pixel GIF",
"84e24a70b78e9de9c9d0dfeb49f3f4247dbc1c715d8844471ee40669270682e199d48f5fbec62bd984c9c0270534b407c4d2561dd6c05adec3c83c1534f32d5c": "This is a 1*1 pixel GIF",
"d5da26b5d496edb0221df1a4057a8b0285d15592a8f8dc7016a294df37ed335f3fde6a2252962e0df38b62847f8b771463a0124ef3f84299f262ed9d9d3cee4c": "This is a 1*1 pixel GIF",
"f7a5f748f4c0d3096a3ca972886fe9a9dff5dce7792779ec6ffc42fa880b3815e2e4c3bdea452352f3844b81864c9bfb7861f66ac961cfa66cb9cb4febe568e8": "This is a 1*1 pixel GIF",
"b2ca25a3311dc42942e046eb1a27038b71d689925b7d6b3ebb4d7cd2c7b9a0c7de3d10175790ac060dc3f8acf3c1708c336626be06879097f4d0ecaa7f567041": "This is a 1*1 pixel GIF",
"b8d82d64ec656c63570b82215564929adad167e61643fd72283b94f3e448ef8ab0ad42202f3537a0da89960bbdc69498608fc6ec89502c6c338b6226c8bf5e14": "This is a 1*1 pixel GIF",
"2991c3aa1ba61a62c1cccd990c0679a1fb8dccd547d153ec0920b91a75ba20820de1d1c206f66d083bf2585d35050f0a39cd7a3e11c03882dafec907d27a0180": "This is a 1*1 pixel GIF",
"b1a6cfa7b21dbb0b281d241af609f3ba7f3a63e5668095bba912bf7cfd7f0320baf7c3b0bfabd0f8609448f39902baeb145ba7a2d8177fe22a6fcea03dd29be1": "This is a 1*1 pixel GIF",
"ebfe0c0df4bcc167d5cb6ebdd379f9083df62bef63a23818e1c6adf0f64b65467ea58b7cd4d03cf0a1b1a2b07fb7b969bf35f25f1f8538cc65cf3eebdf8a0910": "This is a 1*1 pixel GIF",
"1d68b92e8d822fe82dc7563edd7b37f3418a02a89f1a9f0454cca664c2fc2565235e0d85540ff9be0b20175be3f5b7b4eae1175067465d5cca13486aab4c582c": "This is a 1*1 pixel GIF",
"ac44da7f455bfae52b883639964276026fb259320902aa813d0333e021c356a7b3e3537b297f9a2158e588c302987ce0854866c039d1bb0ffb27f67560739db2": "This is a 1*1 pixel GIF",
"921944dc10fbfb6224d69f0b3ac050f4790310fd1bcac3b87c96512ad5ed9a268824f3f5180563d372642071b4704c979d209baf40bc0b1c9a714769aba7dfc7": "This is a 1*1 pixel GIF",
"89dfc38ec77cf258362e4db7c8203cae8a02c0fe4f99265b0539ec4f810c84f8451e22c9bef1ebc59b4089af7e93e378e053c542a5967ec4912d4c1fc5de22f0": "This is a 1*1 pixel GIF",
"280ea4383ee6b37051d91c5af30a5ce72aa4439340fc6d31a4fbe7ba8a8156eb7893891d5b2371b9fc4934a78f08de3d57e5b63fa9d279a317dcbefb8a07a6b0": "This is a 1*1 pixel GIF",
"3844065e1dd778a05e8cc39901fbf3191ded380d594359df137901ec56ca52e03d57eb60acc2421a0ee74f0733bbb5d781b7744685c26fb013a236f49b02fed3": "This is a 1*1 pixel GIF",
"bd9ab35dde3a5242b04c159187732e13b0a6da50ddcff7015dfb78cdd68743e191eaf5cddedd49bef7d2d5a642c217272a40e5ba603fe24ca676a53f8c417c5d": "This is a 1*1 pixel GIF",
"d052ecec2839340876eb57247cfc2e777dd7f2e868dc37cd3f3f740c8deb94917a0c9f2a4fc8229987a0b91b04726de2d1e9f6bcbe3f9bef0e4b7e0d7f65ea12": "This is a 1*1 pixel GIF",
"8717074ddf1198d27b9918132a550cb4ba343794cc3d304a793f9d78c9ff6c4929927b414141d40b6f6ad296725520f4c63edeb660ed530267766c2ab74ee4a9": "This is a 1*1 pixel GIF",
"6834f1548f26b94357fcc3312a3491e8c87080a84f678f990beb2c745899a01e239964521e64a534d7d5554222f728af966ec6ec8291bc64d2005861bcfd78ec": "This is a 1*1 pixel GIF",
"3be8176915593e79bc280d08984a16c29c495bc53be9b439276094b8dcd3764a3c72a046106a06b958e08e67451fe02743175c621a1faa261fe7a9691cc77141": "This is a 1*1 pixel GIF",
"826225fc21717d8861a05b9d2f959539aad2d2b131b2afed75d88fbca535e1b0d5a0da8ac69713a0876a0d467848a37a0a7f926aeafad8cf28201382d16466ab": "This is a 1*1 pixel GIF",
"202612457d9042fe853daab3ddcc1f0f960c5ffdbe8462fa435713e4d1d85ff0c3f197daf8dba15bda9f5266d7e1f9ecaeee045cbc156a4892d2f931fe6fa1bb": "This is a 1*1 pixel GIF",
"b82c6aa1ae927ade5fadbbab478cfaef26d21c1ac441f48e69cfc04cdb779b1e46d7668b4368b933213276068e52f9060228907720492a70fd9bc897191ee77c": "This is a 1*1 pixel GIF",
"763de1053a56a94eef4f72044adb2aa370b98ffa6e0add0b1cead7ee27da519e223921c681ae1db3311273f45d0dd3dc022d102d42ce210c90cb3e761b178438": "This is a 1*1 pixel GIF",
"69e2da5cdc318fc237eaa243b6ea7ecc83b68dbdea8478dc69154abdda86ecb4e16c35891cc1facb3ce7e0cf19d5abf189c50f59c769777706f4558f6442abbc": "This is a 1*1 pixel GIF",
"16dd1560fdd43c3eee7bcf622d940be93e7e74dee90286da37992d69cea844130911b97f41c71f8287b54f00bd3a388191112f490470cf27c374d524f49ba516": "This is a 1*1 pixel GIF",
"01211111688dc2007519ff56603fbe345d057337b911c829aaee97b8d02e7d885e7a2c2d51730f54a04aebc1821897c8041f15e216f1c973ed313087fa91a3fb": "This is a 1*1 pixel GIF",
"71db01662075fac031dea18b2c766826c77dbab01400a8642cdc7059394841d5df9020076554c3beca6f808187d42e1a1acc98fad9a0e1ad32ae869145f53746": "This is a 1*1 pixel GIF",
# "": "This is a 1*1 pixel GIF",
"f1c33e72643ce366fd578e3b5d393799e8c9ea27b180987826af43b4fc00b65a4eaae5e6426a23448956fee99e3108c6a86f32fb4896c156e24af0571a11c498": "This is a 1*1 pixel PNG",
"dc7c40381b3d22919e32c1b700ccb77b1b0aea2690642d01c1ac802561e135c01d5a4d2a0ea18efc0ec3362e8c549814a10a23563f1f56bd62aee0ced7e2bd99": "This is a 1*1 pixel PNG",
"c2c239cb5cdd0b670780ad6414ef6be9ccd4c21ce46bb93d1fa3120ac812f1679445162978c3df05cb2e1582a1844cc4c41cf74960b8fdae3123999c5d2176cc": "This is a 1*1 pixel PNG",
# "": "This is a 1*1 pixel PNG",
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e": "This is an empty file"
}
def __init__(self, config: Dict[str, Any]):
if not ('enabled' in config or config['enabled']):
self.available = False
return
self.client = SaneJS()
if not self.client.is_up:
self.available = False
return
self.available = True
self.storage_dir = get_homedir() / 'sanejs'
self.storage_dir.mkdir(parents=True, exist_ok=True)
def hashes_lookup(self, sha512: Union[List[str], str], force: bool=False) -> Dict[str, Any]:
if isinstance(sha512, str):
hashes = [sha512]
else:
hashes = sha512
today_dir = self.storage_dir / date.today().isoformat()
today_dir.mkdir(parents=True, exist_ok=True)
sanejs_unknowns = today_dir / 'unknown'
unknown_hashes = []
if sanejs_unknowns.exists():
with sanejs_unknowns.open() as f:
unknown_hashes = [line.strip() for line in f.readlines()]
to_return = {h: details for h, details in self.skip_lookup.items() if h in sha512}
to_lookup = [h for h in hashes if h not in self.skip_lookup]
if not force:
to_lookup = [h for h in to_lookup if (h not in unknown_hashes
and not (today_dir / h).exists())]
for h in to_lookup:
response = self.client.sha512(h)
if 'error' in response:
# Server not ready
break
if 'response' in response and response['response']:
cached_path = today_dir / h
with cached_path.open('w') as f:
json.dump(response['response'], f)
to_return[h] = response['response']
else:
unknown_hashes.append(h)
for h in hashes:
cached_path = today_dir / h
if h in unknown_hashes or h in to_return:
continue
elif cached_path.exists():
with cached_path.open() as f:
to_return[h] = json.load(f)
return to_return
class VirusTotal(): class VirusTotal():

1689
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +1,14 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json
import pickle
from zipfile import ZipFile, ZIP_DEFLATED from zipfile import ZipFile, ZIP_DEFLATED
from io import BytesIO from io import BytesIO
import os import os
from pathlib import Path from pathlib import Path
from datetime import datetime, timedelta from datetime import datetime, timedelta
import json
from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response, flash from flask import Flask, render_template, request, send_file, redirect, url_for, Response, flash
from flask_bootstrap import Bootstrap # type: ignore from flask_bootstrap import Bootstrap # type: ignore
from flask_httpauth import HTTPDigestAuth # type: ignore from flask_httpauth import HTTPDigestAuth # type: ignore
@ -18,7 +17,7 @@ from lookyloo.lookyloo import Lookyloo
from lookyloo.exceptions import NoValidHarFile from lookyloo.exceptions import NoValidHarFile
from .proxied import ReverseProxied from .proxied import ReverseProxied
from typing import Tuple, Optional, Dict, Any from typing import Optional, Dict, Any
import logging import logging
@ -48,6 +47,19 @@ time_delta_on_index = lookyloo.get_config('time_delta_on_index')
logging.basicConfig(level=lookyloo.get_config('loglevel')) logging.basicConfig(level=lookyloo.get_config('loglevel'))
# Method to make sizes in bytes human readable
# Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt)
@auth.get_password @auth.get_password
def get_pw(username: str) -> Optional[str]: def get_pw(username: str) -> Optional[str]:
if username in user: if username in user:
@ -79,14 +91,6 @@ def rebuild_tree(tree_uuid: str):
return redirect(url_for('index')) return redirect(url_for('index'))
# keep
def load_tree(capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, Any]]:
session.clear()
temp_file_name, tree_json, tree_time, tree_ua, tree_root_url, meta = lookyloo.load_tree(capture_dir)
session["tree"] = temp_file_name
return tree_json, tree_time, tree_ua, tree_root_url, meta
@app.route('/submit', methods=['POST', 'GET']) @app.route('/submit', methods=['POST', 'GET'])
def submit(): def submit():
to_query = request.get_json(force=True) to_query = request.get_json(force=True)
@ -116,11 +120,12 @@ def scrape_web():
return render_template('scrape.html', user_agents=user_agents) return render_template('scrape.html', user_agents=user_agents)
@app.route('/tree/hostname/<string:node_uuid>/text', methods=['GET']) @app.route('/tree/<string:tree_uuid>/hostname/<string:node_uuid>/text', methods=['GET'])
def hostnode_details_text(node_uuid: str): def hostnode_details_text(tree_uuid: str, node_uuid: str):
with open(session["tree"], 'rb') as f: capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
ct = pickle.load(f) if not capture_dir:
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid) return
hostnode = lookyloo.get_hostnode_from_tree(capture_dir, node_uuid)
urls = [] urls = []
for url in hostnode.urls: for url in hostnode.urls:
urls.append(url.name) urls.append(url.name)
@ -134,27 +139,85 @@ def hostnode_details_text(node_uuid: str):
as_attachment=True, attachment_filename='file.md') as_attachment=True, attachment_filename='file.md')
@app.route('/tree/hostname/<string:node_uuid>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/hostname_popup/<string:node_uuid>', methods=['GET'])
def hostnode_details(node_uuid: str): def hostnode_popup(tree_uuid: str, node_uuid: str):
with open(session["tree"], 'rb') as f: capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
ct = pickle.load(f) if not capture_dir:
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid) return
hostnode = lookyloo.get_hostnode_from_tree(capture_dir, node_uuid)
keys_response = {
'js': "/static/javascript.png",
'exe': "/static/exe.png",
'css': "/static/css.png",
'font': "/static/font.png",
'html': "/static/html.png",
'json': "/static/json.png",
'iframe': "/static/ifr.png",
'image': "/static/img.png",
'unknown_mimetype': "/static/wtf.png",
'video': "/static/video.png",
'response_cookie': "/static/cookie_received.png",
'redirect': "/static/redirect.png",
'redirect_to_nothing': "/static/cookie_in_url.png"
}
keys_request = {
'request_cookie': "/static/cookie_read.png",
}
urls = [] urls = []
if lookyloo.sanejs.available:
to_lookup = [url.body_hash for url in hostnode.urls if hasattr(url, 'body_hash')]
lookups = lookyloo.sanejs.hashes_lookup(to_lookup)
for url in hostnode.urls: for url in hostnode.urls:
if hasattr(url, 'body_hash'): if lookyloo.sanejs.available and hasattr(url, 'body_hash') and url.body_hash in lookups:
sane_js_r = lookyloo.sane_js_query(url.body_hash) url.add_feature('sane_js_details', lookups[url.body_hash])
if sane_js_r.get('response'): if lookups[url.body_hash]:
url.add_feature('sane_js_details', sane_js_r['response']) if isinstance(lookups[url.body_hash], list):
print('######## SANEJS ##### ', url.sane_js_details) libname, version, path = lookups[url.body_hash][0].split("|")
urls.append(url.to_json()) other_files = len(lookups[url.body_hash])
return json.dumps(urls) url.add_feature('sane_js_details_to_print', (libname, version, path, other_files))
else:
# Predefined generic file
url.add_feature('sane_js_details_to_print', lookups[url.body_hash])
urls.append(url)
return render_template('hostname_popup.html',
tree_uuid=tree_uuid,
hostname_uuid=node_uuid,
hostname=hostnode.name,
urls=urls,
keys_response=keys_response,
keys_request=keys_request)
@app.route('/tree/url/<string:node_uuid>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
def urlnode_details(node_uuid: str): def urlnode_post_request(tree_uuid: str, node_uuid: str):
with open(session["tree"], 'rb') as f: capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
ct = pickle.load(f) if not capture_dir:
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid) return
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
if not urlnode.posted_data:
return
if isinstance(urlnode.posted_data, (dict, list)):
# JSON blob, pretty print.
posted = json.dumps(urlnode.posted_data, indent=2)
else:
posted = urlnode.posted_data
if isinstance(posted, bytes):
to_return = BytesIO(posted)
else:
to_return = BytesIO(posted.encode())
to_return.seek(0)
return send_file(to_return, mimetype='text/plain',
as_attachment=True, attachment_filename='posted_data.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>', methods=['GET'])
def urlnode_details(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
to_return = BytesIO() to_return = BytesIO()
got_content = False got_content = False
if hasattr(urlnode, 'body'): if hasattr(urlnode, 'body'):
@ -300,7 +363,7 @@ def tree(tree_uuid: str):
enable_mail_notification = True enable_mail_notification = True
else: else:
enable_mail_notification = False enable_mail_notification = False
tree_json, start_time, user_agent, root_url, meta = load_tree(capture_dir) tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(capture_dir)
return render_template('tree.html', tree_json=tree_json, start_time=start_time, return render_template('tree.html', tree_json=tree_json, start_time=start_time,
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid, user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid,
meta=meta, enable_mail_notification=enable_mail_notification) meta=meta, enable_mail_notification=enable_mail_notification)

View File

@ -92,163 +92,22 @@ d3.selection.prototype.moveToBack = function() {
}); });
}; };
// What happen when clicking on a domain (load a modal display) function hostnode_click_popup(d) {
function hostnode_click(d) { window.open('/tree/' + treeUUID + '/hostname_popup/' + d.data.uuid, '_blank', 'width=1024,height=768,left=200,top=100');
// Move the node to the front (end of the list) };
var cur_node = d3.select("#node_" + d.data.uuid).moveToFront();
// Avoid duplicating overlays
cur_node.selectAll('.overlay').remove();
// Insert new svg element at this position
var overlay_hostname = cur_node.append('g')
.attr('class', 'overlay');
cur_node.append('line') function ProcessChildMessage(message) {
.attr('id', 'overlay_link') var element = document.getElementById("node_" + message);
.style("opacity", "0.95") element.scrollIntoView({behavior: "smooth", block: "center", inline: "nearest"});
.attr("stroke-width", "2")
.style("stroke", "gray");
var top_margin = 15; var to_blink = d3.select("#node_" + message).select('text');
var overlay_header_height = 50; to_blink
var left_margin = 30; .transition().duration(500) //Set transition
.style('fill', 'red')
overlay_hostname .style('font-size', '20px')
.datum({x: 0, y: 0, overlay_uuid: d.data.uuid}) .transition().duration(500) //Set transition
.attr('id', 'overlay_' + d.data.uuid) .style('fill', 'black')
.attr("transform", "translate(" + 10 + "," + 15 + ")") .style('font-size', '16px');
.call(d3.drag().on("drag", function(d, i) {
if (typeof d.x === 'undefined') { d.x = 0; } // Any real JS dev would kill me fo that, right?
if (typeof d.y === 'undefined') { d.y = 0; } // Maybe even twice.
d.x += d3.event.dx
d.y += d3.event.dy
d3.select(this)
.attr("transform", "translate(" + d.x + "," + d.y + ")");
cur_node.select('#overlay_link')
.attr("x2", d.x + left_margin + 10)
.attr("y2", d.y + top_margin + 15);
}));
overlay_hostname.append('rect')
.attr("rx", 6)
.attr("ry", 6)
.attr('x', 15)
.attr('y', 10)
.style("opacity", "0.95")
.attr("stroke", "black")
.attr('stroke-opacity', "0.8")
.attr("stroke-width", "2")
.attr("stroke-linecap", "round")
.attr("fill", "white");
// Modal display
var url = "/tree/hostname/" + d.data.uuid;
d3.json(url, {credentials: 'same-origin'}).then(urls => {
overlay_hostname
.append('circle')
.attr('id', 'overlay_circle_' + d.data.uuid)
.attr('height', overlay_header_height)
.attr('cx', left_margin + 10)
.attr('cy', top_margin + 15)
.attr('r', 2);
overlay_hostname
.append('text')
.attr('id', 'overlay_close_' + d.data.uuid)
.attr('height', overlay_header_height)
.attr('x', left_margin + 500) // Value updated based on the size of the rectangle max: max_overlay_width
.attr('y', top_margin + 25)
.style("font-size", '30px')
.text('\u2716')
.attr('cursor', 'pointer')
.on("click", () => {
main_svg.selectAll('#overlay_' + d.data.uuid).remove();
cur_node.select('#overlay_link').remove();
}
);
overlay_hostname.append('line')
.attr('id', 'overlay_separator_header' + d.data.uuid)
.style("stroke", "black")
.style('stroke-width', "1px")
.attr('x1', 20)
.attr('y1', overlay_header_height)
.attr('x2', 500)
.attr('y2', overlay_header_height);
var url_entries = overlay_hostname.append('svg');
var interval_entries = 10;
urls.forEach((url, index, array) => {
var jdata = JSON.parse(url);
var url_data = url_entries.append('svg')
.attr('class', 'url_data');
url_data.datum({'data': jdata});
url_data.append(d => text_entry(left_margin, top_margin + overlay_header_height + (interval_entries * index), urlnode_click, d));
url_data.append(d => icon_list(left_margin + 5, top_margin + 20 + overlay_header_height + (interval_entries * index), d, url_view=true));
});
var cur_url_data_height = 0;
url_entries.selectAll('.url_data').each(function(p, j){
d3.select(this).attr('y', cur_url_data_height);
cur_url_data_height += d3.select(this).node().getBBox().height;
var cur_icon_list_len = 0;
// set position of icons based of their length
d3.select(this).selectAll('.icon').each(function(p, j){
d3.select(this).attr('x', cur_icon_list_len);
cur_icon_list_len += d3.select(this).node().getBBox().width;
});
});
var overlay_bbox = overlay_hostname.node().getBBox()
overlay_hostname.append('line')
.attr('id', 'overlay_separator_footer' + d.data.uuid)
.style("stroke", "black")
.style('stroke-width', "1px")
.attr('x1', 20)
.attr('y1', overlay_bbox.height + 20)
.attr('x2', 500)
.attr('y2', overlay_bbox.height + 20);
var overlay_bbox = overlay_hostname.node().getBBox()
overlay_hostname
.append('text')
.attr('id', 'overlay_download_' + d.data.uuid)
.attr('height', overlay_header_height - 10)
.attr('x', left_margin)
.attr('y', overlay_bbox.height + overlay_header_height)
.style("font-size", '20px')
.text('Download URLs as text')
.attr('cursor', 'pointer')
.on("click", () => {
var url = "/tree/hostname/" + d.data.uuid + '/text';
d3.blob(url, {credentials: 'same-origin'}).then(data => {
saveAs(data, 'file.md');
});
});
var overlay_bbox = overlay_hostname.node().getBBox();
overlay_hostname.select('rect')
.attr('width', () => {
optimal_size = overlay_bbox.width + left_margin
return optimal_size < max_overlay_width ? optimal_size : max_overlay_width;
})
.attr('height', overlay_bbox.height + overlay_header_height);
overlay_hostname.select('#overlay_close_' + d.data.uuid)
.attr('x', overlay_hostname.select('rect').node().getBBox().width - 20);
overlay_hostname.select('#overlay_separator_header' + d.data.uuid)
.attr('x2', overlay_hostname.select('rect').node().getBBox().width + 10);
overlay_hostname.select('#overlay_separator_footer' + d.data.uuid)
.attr('x2', overlay_hostname.select('rect').node().getBBox().width + 10);
cur_node.select('#overlay_link')
.attr("x1", 10)
.attr("y1", 0)
.attr("x2", left_margin + 3)
.attr("y2", top_margin + 7);
});
}; };
function icon(key, icon_path, d, icon_size){ function icon(key, icon_path, d, icon_size){
@ -483,7 +342,7 @@ function update(root, computed_node_width=0) {
// Set Hostname text // Set Hostname text
node_data node_data
.append(d => text_entry(15, 5, hostnode_click, d)); .append(d => text_entry(15, 5, hostnode_click_popup, d)); // Popup
// Set list of icons // Set list of icons
node_data node_data
.append(d => icon_list(17, 35, d)); .append(d => icon_list(17, 35, d));

View File

@ -0,0 +1,78 @@
{% extends "main.html" %}
{% block title %}Details for {{ hostname }} {% endblock %}
{% block scripts %}
<script>
function whereAmI() {
window.opener.ProcessChildMessage("{{ hostname_uuid }}");
};
</script>
{% endblock %}
{% block content %}
<center>
<h3>{{ hostname }}</h3>
<button type="button" class="btn btn-secondary" onclick="whereAmI()">Locate node on tree</button>
<a href="{{ url_for('hostnode_details_text', tree_uuid=tree_uuid, node_uuid=hostname_uuid) }}" class="btn btn-info" role="button">Get URLs as text</a>
</center>
<p>Click on the URL to get the content of the response</p>
<ul class="list-group-flush">
{% for url in urls %}
<li class="list-group-item">
<p class="h3">{{ url.name }}</p>
<ul class="list-group">
<li class="list-group-item">
<p class="h4">Response</p>
<div>
{% for key, path in keys_response.items() %}
{% if url[key] %}
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
{%endif%}
{% endfor %}
</div>
{% if not url.empty_response %}
<div>
<a href="{{ url_for('urlnode_details', tree_uuid=tree_uuid, node_uuid=url.uuid) }}">
Download response body.
</a></br>
Body size: {{ sizeof_fmt(url.body.getbuffer().nbytes) }}
</div>
{% else %}
Empty body.
{%endif%}
{% if url.sane_js_details_to_print %}
<div>
{% if url.sane_js_details_to_print is string %}
{{ url.sane_js_details_to_print }}
{% else %}
This file is known as part of <b>{{ url.sane_js_details_to_print[0] }}</b>
version <b>{{ url.sane_js_details_to_print[1] }}</b>: <b>{{ url.sane_js_details_to_print[2] }}</b>.
{% if url.sane_js_details_to_print[3] > 1%}
It is also present in <b>{{ url.sane_js_details_to_print[3] -1 }}</b> other libraries.
{%endif%}
{%endif%}
</div>
{% endif %}
</li>
<li class="list-group-item">
<p class="h4">Request</p>
<div>
{% for key, path in keys_request.items() %}
{% if url[key] %}
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
{%endif%}
{% endfor %}
</div>
{% if url.posted_data %}
<a href="{{ url_for('urlnode_post_request', tree_uuid=tree_uuid, node_uuid=url.uuid) }}">
Download posted data
</a></br>
Posted data size: {{ sizeof_fmt(url.posted_data|length) }}
{% endif %}
</li>
</ul>
</li>
{% endfor %}
</ul>
{% endblock %}

View File

@ -9,6 +9,14 @@
</center> </center>
</br> </br>
<form role="form" action="scrape" method=post enctype=multipart/form-data> <form role="form" action="scrape" method=post enctype=multipart/form-data>
<div class="form-group row">
<div class="col-sm-10">
<div class="form-check">
<input class="form-check-input"type="checkbox" name="listing" checked="true"></input>
<label for="listing" class="form-check-label">Display results on public page</label>
</div>
</div>
</div>
<div class="form-group row"> <div class="form-group row">
<label for="url" class="col-sm-2 col-form-label">URL:</label> <label for="url" class="col-sm-2 col-form-label">URL:</label>
<div class="col-sm-10"> <div class="col-sm-10">
@ -33,14 +41,6 @@
</div> </div>
</div> </div>
<div class="form-group row">
<div class="col-sm-10">
<div class="form-check">
<input class="form-check-input"type="checkbox" name="listing" checked="true"></input>
<label for="listing" class="form-check-label">Display on main page</label>
</div>
</div>
</div>
<div class="form-group row"> <div class="form-group row">
<label for="cookies" class="col-sm-6">Cookies (JSON export from the Firefox plugin Cookie Quick Manager)</label> <label for="cookies" class="col-sm-6">Cookies (JSON export from the Firefox plugin Cookie Quick Manager)</label>
<div class="col-sm-4"> <div class="col-sm-4">
@ -89,7 +89,7 @@
{% endfor%} {% endfor%}
{% endfor%} {% endfor%}
<div class="dropdown-divider"></div> <div class="dropdown-divider"></div>
<center><button type="submit" class="btn btn-default">Start looking!</button></center> <center><button type="submit" class="btn btn-primary">Start looking!</button></center>
</form> </form>
</div> </div>
{% endblock %} {% endblock %}

View File

@ -4,7 +4,6 @@
<dl class="row"> <dl class="row">
<dt class="col-sm-3">{{ key }}</dt> <dt class="col-sm-3">{{ key }}</dt>
<dd class="col-sm-3">{{ value }}</dd> <dd class="col-sm-3">{{ value }}</dd>
</center>
</dl> </dl>
{% endfor %} {% endfor %}
{% endif%} {% endif%}

View File

@ -40,6 +40,7 @@
{% block content %} {% block content %}
{{super()}} {{super()}}
<script> <script>
var treeUUID = "{{ tree_uuid }}";
var treeData = {{ tree_json | safe }}; var treeData = {{ tree_json | safe }};
</script> </script>