Merge branch 'new_popup'

pull/79/head
Raphaël Vinot 2020-05-25 10:30:52 +02:00
commit 3e5a6cb8ab
13 changed files with 382 additions and 1913 deletions

View File

@ -3,10 +3,11 @@
"splash_loglevel": "WARNING",
"only_global_lookups": true,
"splash_url": "http://127.0.0.1:8050",
"default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
"cache_clean_user": {},
"time_delta_on_index": {
"weeks": 0,
"days": 1,
"weeks": 1,
"days": 0,
"hours": 0
},
"enable_mail_notification": false,
@ -23,6 +24,7 @@
"splash_loglevel": "(Splash) INFO is *very* verbose.",
"only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network",
"splash_url": "URL to connect to splash",
"default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA",
"cache_clean_user": "Format: {username: password}",
"time_delta_on_index": "Time interval of the capture displayed on the index",
"enable_mail_notification": "Enable email notification or not",

View File

@ -2,5 +2,8 @@
"VirusTotal": {
"apikey": "KEY",
"autosubmit": false
},
"SaneJS": {
"enabled": true
}
}

43
doc/notes_papers.md Normal file
View File

@ -0,0 +1,43 @@
# AdGraph
## Implementation
* https://github.com/uiowa-irl/AdGraph
4000+ lines of patch on Chromium version 69.0.3441.0 (released 25 May 2018)
## Paper
* https://umariqbal.com/papers/adgraph-sp2020.pdf
## Key points for lookyloo
### Static, node by node
* features of the node
* keywords in URL
* keywords in content
* length & parameters of the URL
* On image: OCR (?)
* Domain => blocklists (ublock)
* Javascript analysis:
* eval
* specific keywords (tracking, ads, fingerprint...)
* specific JS calls (track mouse, scrolling)
* Async calls are very often used by ads, recommandation: https://www.iab.com/wp-content/uploads/2017/08/IABNewAdPortfolio_FINAL_2017.pdf
* /!\ anything obfuscated is just under the radar
### Dynamic, pased on the tree
* size
* position in the tree
* parent features
* siblings
* number and type of children
# Other ressources
* Ads standards: https://github.com/InteractiveAdvertisingBureau - https://iabtechlab.com/standards/
* Standard API for Ads bidding: https://github.com/prebid/

View File

@ -20,3 +20,7 @@ class CreateDirectoryException(LookylooException):
class ConfigError(LookylooException):
pass
class MissingUUID(LookylooException):
pass

View File

@ -18,14 +18,13 @@ from uuid import uuid4
from zipfile import ZipFile
from defang import refang # type: ignore
from har2tree import CrawledTree, Har2TreeError, HarFile
from pysanejs import SaneJS
from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode
from redis import Redis
from scrapysplashwrapper import crawl
from .exceptions import NoValidHarFile
from .exceptions import NoValidHarFile, MissingUUID
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template
from .modules import VirusTotal
from .modules import VirusTotal, SaneJavaScript
class Lookyloo():
@ -50,17 +49,14 @@ class Lookyloo():
self.vt = VirusTotal(self.configs['modules']['VirusTotal'])
if not self.vt.available:
self.logger.warning('Unable to setup the VirusTotal module')
if 'SaneJS' in self.configs['modules']:
self.sanejs = SaneJavaScript(self.configs['modules']['SaneJS'])
if not self.sanejs.available:
self.logger.warning('Unable to setup the SaneJS module')
if not self.redis.exists('cache_loaded'):
self._init_existing_dumps()
# Try to reach sanejs
self.sanejs = SaneJS()
if not self.sanejs.is_up:
self.use_sane_js = False
else:
self.use_sane_js = True
def rebuild_cache(self) -> None:
self.redis.flushdb()
self._init_existing_dumps()
@ -88,6 +84,18 @@ class Lookyloo():
sample_config = json.load(_c)
return sample_config[entry]
def get_urlnode_from_tree(self, capture_dir: Path, node_uuid: str) -> URLNode:
ct = self._load_pickle(capture_dir / 'tree.pickle')
if not ct:
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
return ct.root_hartree.get_url_node_by_uuid(node_uuid)
def get_hostnode_from_tree(self, capture_dir: Path, node_uuid: str) -> HostNode:
ct = self._load_pickle(capture_dir / 'tree.pickle')
if not ct:
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
return ct.root_hartree.get_host_node_by_uuid(node_uuid)
def get_statistics(self, capture_dir: Path) -> Dict[str, Any]:
# We need the pickle
ct = self._load_pickle(capture_dir / 'tree.pickle')
@ -151,7 +159,7 @@ class Lookyloo():
self.redis.hset('lookup_dirs', uuid, str(capture_dir))
return
har = HarFile(har_files[0])
har = HarFile(har_files[0], uuid)
redirects = har.initial_redirects
incomplete_redirects = False
@ -268,7 +276,7 @@ class Lookyloo():
except Exception as e:
logging.exception(e)
def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, str, Dict[str, str]]:
def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, str]]:
har_files = sorted(capture_dir.glob('*.har'))
pickle_file = capture_dir / 'tree.pickle'
try:
@ -284,7 +292,7 @@ class Lookyloo():
ct = CrawledTree(har_files, uuid)
with pickle_file.open('wb') as _p:
pickle.dump(ct, _p)
return str(pickle_file), ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
except Har2TreeError as e:
raise NoValidHarFile(e.message)
@ -312,11 +320,6 @@ class Lookyloo():
def get_capture(self, capture_dir: Path) -> BytesIO:
return self._get_raw(capture_dir)
def sane_js_query(self, sha512: str) -> Dict[str, Any]:
if self.use_sane_js:
return self.sanejs.sha512(sha512)
return {'response': []}
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None,
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
perma_uuid: str=None, os: str=None, browser: str=None) -> Union[bool, str]:
@ -339,7 +342,12 @@ class Lookyloo():
return False
cookies = load_cookies(cookies_pseudofile)
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=user_agent,
if not user_agent:
# Catch case where the UA is broken on the UI, and the async submission.
ua: str = self.get_config('default_user_agent') # type: ignore
else:
ua = user_agent
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
log_enabled=True, log_level=self.get_config('splash_loglevel'))
if not items:
# broken

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import Dict, Any, Optional
from typing import Dict, Any, Optional, List, Union
from datetime import date
import hashlib
import json
@ -13,6 +13,104 @@ from .helpers import get_homedir
from .exceptions import ConfigError
import vt # type: ignore
from pysanejs import SaneJS
class SaneJavaScript():
skip_lookup: Dict[str, str] = {
"717ea0ff7f3f624c268eccb244e24ec1305ab21557abb3d6f1a7e183ff68a2d28f13d1d2af926c9ef6d1fb16dd8cbe34cd98cacf79091dddc7874dcee21ecfdc": "This is a 1*1 pixel GIF",
"e508d5d17e94d14b126164082342a9ca4774f404e87a3dd56c26812493ee18d9c3d6daacca979134a94a003066aca24116de874596d00d1e52130c1283d54209": "This is a 1*1 pixel GIF",
"2d073e10ae40fde434eb31cbedd581a35cd763e51fb7048b88caa5f949b1e6105e37a228c235bc8976e8db58ed22149cfccf83b40ce93a28390566a28975744a": "This is a 1*1 pixel GIF",
"84e24a70b78e9de9c9d0dfeb49f3f4247dbc1c715d8844471ee40669270682e199d48f5fbec62bd984c9c0270534b407c4d2561dd6c05adec3c83c1534f32d5c": "This is a 1*1 pixel GIF",
"d5da26b5d496edb0221df1a4057a8b0285d15592a8f8dc7016a294df37ed335f3fde6a2252962e0df38b62847f8b771463a0124ef3f84299f262ed9d9d3cee4c": "This is a 1*1 pixel GIF",
"f7a5f748f4c0d3096a3ca972886fe9a9dff5dce7792779ec6ffc42fa880b3815e2e4c3bdea452352f3844b81864c9bfb7861f66ac961cfa66cb9cb4febe568e8": "This is a 1*1 pixel GIF",
"b2ca25a3311dc42942e046eb1a27038b71d689925b7d6b3ebb4d7cd2c7b9a0c7de3d10175790ac060dc3f8acf3c1708c336626be06879097f4d0ecaa7f567041": "This is a 1*1 pixel GIF",
"b8d82d64ec656c63570b82215564929adad167e61643fd72283b94f3e448ef8ab0ad42202f3537a0da89960bbdc69498608fc6ec89502c6c338b6226c8bf5e14": "This is a 1*1 pixel GIF",
"2991c3aa1ba61a62c1cccd990c0679a1fb8dccd547d153ec0920b91a75ba20820de1d1c206f66d083bf2585d35050f0a39cd7a3e11c03882dafec907d27a0180": "This is a 1*1 pixel GIF",
"b1a6cfa7b21dbb0b281d241af609f3ba7f3a63e5668095bba912bf7cfd7f0320baf7c3b0bfabd0f8609448f39902baeb145ba7a2d8177fe22a6fcea03dd29be1": "This is a 1*1 pixel GIF",
"ebfe0c0df4bcc167d5cb6ebdd379f9083df62bef63a23818e1c6adf0f64b65467ea58b7cd4d03cf0a1b1a2b07fb7b969bf35f25f1f8538cc65cf3eebdf8a0910": "This is a 1*1 pixel GIF",
"1d68b92e8d822fe82dc7563edd7b37f3418a02a89f1a9f0454cca664c2fc2565235e0d85540ff9be0b20175be3f5b7b4eae1175067465d5cca13486aab4c582c": "This is a 1*1 pixel GIF",
"ac44da7f455bfae52b883639964276026fb259320902aa813d0333e021c356a7b3e3537b297f9a2158e588c302987ce0854866c039d1bb0ffb27f67560739db2": "This is a 1*1 pixel GIF",
"921944dc10fbfb6224d69f0b3ac050f4790310fd1bcac3b87c96512ad5ed9a268824f3f5180563d372642071b4704c979d209baf40bc0b1c9a714769aba7dfc7": "This is a 1*1 pixel GIF",
"89dfc38ec77cf258362e4db7c8203cae8a02c0fe4f99265b0539ec4f810c84f8451e22c9bef1ebc59b4089af7e93e378e053c542a5967ec4912d4c1fc5de22f0": "This is a 1*1 pixel GIF",
"280ea4383ee6b37051d91c5af30a5ce72aa4439340fc6d31a4fbe7ba8a8156eb7893891d5b2371b9fc4934a78f08de3d57e5b63fa9d279a317dcbefb8a07a6b0": "This is a 1*1 pixel GIF",
"3844065e1dd778a05e8cc39901fbf3191ded380d594359df137901ec56ca52e03d57eb60acc2421a0ee74f0733bbb5d781b7744685c26fb013a236f49b02fed3": "This is a 1*1 pixel GIF",
"bd9ab35dde3a5242b04c159187732e13b0a6da50ddcff7015dfb78cdd68743e191eaf5cddedd49bef7d2d5a642c217272a40e5ba603fe24ca676a53f8c417c5d": "This is a 1*1 pixel GIF",
"d052ecec2839340876eb57247cfc2e777dd7f2e868dc37cd3f3f740c8deb94917a0c9f2a4fc8229987a0b91b04726de2d1e9f6bcbe3f9bef0e4b7e0d7f65ea12": "This is a 1*1 pixel GIF",
"8717074ddf1198d27b9918132a550cb4ba343794cc3d304a793f9d78c9ff6c4929927b414141d40b6f6ad296725520f4c63edeb660ed530267766c2ab74ee4a9": "This is a 1*1 pixel GIF",
"6834f1548f26b94357fcc3312a3491e8c87080a84f678f990beb2c745899a01e239964521e64a534d7d5554222f728af966ec6ec8291bc64d2005861bcfd78ec": "This is a 1*1 pixel GIF",
"3be8176915593e79bc280d08984a16c29c495bc53be9b439276094b8dcd3764a3c72a046106a06b958e08e67451fe02743175c621a1faa261fe7a9691cc77141": "This is a 1*1 pixel GIF",
"826225fc21717d8861a05b9d2f959539aad2d2b131b2afed75d88fbca535e1b0d5a0da8ac69713a0876a0d467848a37a0a7f926aeafad8cf28201382d16466ab": "This is a 1*1 pixel GIF",
"202612457d9042fe853daab3ddcc1f0f960c5ffdbe8462fa435713e4d1d85ff0c3f197daf8dba15bda9f5266d7e1f9ecaeee045cbc156a4892d2f931fe6fa1bb": "This is a 1*1 pixel GIF",
"b82c6aa1ae927ade5fadbbab478cfaef26d21c1ac441f48e69cfc04cdb779b1e46d7668b4368b933213276068e52f9060228907720492a70fd9bc897191ee77c": "This is a 1*1 pixel GIF",
"763de1053a56a94eef4f72044adb2aa370b98ffa6e0add0b1cead7ee27da519e223921c681ae1db3311273f45d0dd3dc022d102d42ce210c90cb3e761b178438": "This is a 1*1 pixel GIF",
"69e2da5cdc318fc237eaa243b6ea7ecc83b68dbdea8478dc69154abdda86ecb4e16c35891cc1facb3ce7e0cf19d5abf189c50f59c769777706f4558f6442abbc": "This is a 1*1 pixel GIF",
"16dd1560fdd43c3eee7bcf622d940be93e7e74dee90286da37992d69cea844130911b97f41c71f8287b54f00bd3a388191112f490470cf27c374d524f49ba516": "This is a 1*1 pixel GIF",
"01211111688dc2007519ff56603fbe345d057337b911c829aaee97b8d02e7d885e7a2c2d51730f54a04aebc1821897c8041f15e216f1c973ed313087fa91a3fb": "This is a 1*1 pixel GIF",
"71db01662075fac031dea18b2c766826c77dbab01400a8642cdc7059394841d5df9020076554c3beca6f808187d42e1a1acc98fad9a0e1ad32ae869145f53746": "This is a 1*1 pixel GIF",
# "": "This is a 1*1 pixel GIF",
"f1c33e72643ce366fd578e3b5d393799e8c9ea27b180987826af43b4fc00b65a4eaae5e6426a23448956fee99e3108c6a86f32fb4896c156e24af0571a11c498": "This is a 1*1 pixel PNG",
"dc7c40381b3d22919e32c1b700ccb77b1b0aea2690642d01c1ac802561e135c01d5a4d2a0ea18efc0ec3362e8c549814a10a23563f1f56bd62aee0ced7e2bd99": "This is a 1*1 pixel PNG",
"c2c239cb5cdd0b670780ad6414ef6be9ccd4c21ce46bb93d1fa3120ac812f1679445162978c3df05cb2e1582a1844cc4c41cf74960b8fdae3123999c5d2176cc": "This is a 1*1 pixel PNG",
# "": "This is a 1*1 pixel PNG",
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e": "This is an empty file"
}
def __init__(self, config: Dict[str, Any]):
if not ('enabled' in config or config['enabled']):
self.available = False
return
self.client = SaneJS()
if not self.client.is_up:
self.available = False
return
self.available = True
self.storage_dir = get_homedir() / 'sanejs'
self.storage_dir.mkdir(parents=True, exist_ok=True)
def hashes_lookup(self, sha512: Union[List[str], str], force: bool=False) -> Dict[str, Any]:
if isinstance(sha512, str):
hashes = [sha512]
else:
hashes = sha512
today_dir = self.storage_dir / date.today().isoformat()
today_dir.mkdir(parents=True, exist_ok=True)
sanejs_unknowns = today_dir / 'unknown'
unknown_hashes = []
if sanejs_unknowns.exists():
with sanejs_unknowns.open() as f:
unknown_hashes = [line.strip() for line in f.readlines()]
to_return = {h: details for h, details in self.skip_lookup.items() if h in sha512}
to_lookup = [h for h in hashes if h not in self.skip_lookup]
if not force:
to_lookup = [h for h in to_lookup if (h not in unknown_hashes
and not (today_dir / h).exists())]
for h in to_lookup:
response = self.client.sha512(h)
if 'error' in response:
# Server not ready
break
if 'response' in response and response['response']:
cached_path = today_dir / h
with cached_path.open('w') as f:
json.dump(response['response'], f)
to_return[h] = response['response']
else:
unknown_hashes.append(h)
for h in hashes:
cached_path = today_dir / h
if h in unknown_hashes or h in to_return:
continue
elif cached_path.exists():
with cached_path.open() as f:
to_return[h] = json.load(f)
return to_return
class VirusTotal():

1689
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +1,14 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import pickle
from zipfile import ZipFile, ZIP_DEFLATED
from io import BytesIO
import os
from pathlib import Path
from datetime import datetime, timedelta
import json
from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response, flash
from flask import Flask, render_template, request, send_file, redirect, url_for, Response, flash
from flask_bootstrap import Bootstrap # type: ignore
from flask_httpauth import HTTPDigestAuth # type: ignore
@ -18,7 +17,7 @@ from lookyloo.lookyloo import Lookyloo
from lookyloo.exceptions import NoValidHarFile
from .proxied import ReverseProxied
from typing import Tuple, Optional, Dict, Any
from typing import Optional, Dict, Any
import logging
@ -48,6 +47,19 @@ time_delta_on_index = lookyloo.get_config('time_delta_on_index')
logging.basicConfig(level=lookyloo.get_config('loglevel'))
# Method to make sizes in bytes human readable
# Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt)
@auth.get_password
def get_pw(username: str) -> Optional[str]:
if username in user:
@ -79,14 +91,6 @@ def rebuild_tree(tree_uuid: str):
return redirect(url_for('index'))
# keep
def load_tree(capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, Any]]:
session.clear()
temp_file_name, tree_json, tree_time, tree_ua, tree_root_url, meta = lookyloo.load_tree(capture_dir)
session["tree"] = temp_file_name
return tree_json, tree_time, tree_ua, tree_root_url, meta
@app.route('/submit', methods=['POST', 'GET'])
def submit():
to_query = request.get_json(force=True)
@ -116,11 +120,12 @@ def scrape_web():
return render_template('scrape.html', user_agents=user_agents)
@app.route('/tree/hostname/<string:node_uuid>/text', methods=['GET'])
def hostnode_details_text(node_uuid: str):
with open(session["tree"], 'rb') as f:
ct = pickle.load(f)
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
@app.route('/tree/<string:tree_uuid>/hostname/<string:node_uuid>/text', methods=['GET'])
def hostnode_details_text(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
hostnode = lookyloo.get_hostnode_from_tree(capture_dir, node_uuid)
urls = []
for url in hostnode.urls:
urls.append(url.name)
@ -134,27 +139,85 @@ def hostnode_details_text(node_uuid: str):
as_attachment=True, attachment_filename='file.md')
@app.route('/tree/hostname/<string:node_uuid>', methods=['GET'])
def hostnode_details(node_uuid: str):
with open(session["tree"], 'rb') as f:
ct = pickle.load(f)
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
@app.route('/tree/<string:tree_uuid>/hostname_popup/<string:node_uuid>', methods=['GET'])
def hostnode_popup(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
hostnode = lookyloo.get_hostnode_from_tree(capture_dir, node_uuid)
keys_response = {
'js': "/static/javascript.png",
'exe': "/static/exe.png",
'css': "/static/css.png",
'font': "/static/font.png",
'html': "/static/html.png",
'json': "/static/json.png",
'iframe': "/static/ifr.png",
'image': "/static/img.png",
'unknown_mimetype': "/static/wtf.png",
'video': "/static/video.png",
'response_cookie': "/static/cookie_received.png",
'redirect': "/static/redirect.png",
'redirect_to_nothing': "/static/cookie_in_url.png"
}
keys_request = {
'request_cookie': "/static/cookie_read.png",
}
urls = []
if lookyloo.sanejs.available:
to_lookup = [url.body_hash for url in hostnode.urls if hasattr(url, 'body_hash')]
lookups = lookyloo.sanejs.hashes_lookup(to_lookup)
for url in hostnode.urls:
if hasattr(url, 'body_hash'):
sane_js_r = lookyloo.sane_js_query(url.body_hash)
if sane_js_r.get('response'):
url.add_feature('sane_js_details', sane_js_r['response'])
print('######## SANEJS ##### ', url.sane_js_details)
urls.append(url.to_json())
return json.dumps(urls)
if lookyloo.sanejs.available and hasattr(url, 'body_hash') and url.body_hash in lookups:
url.add_feature('sane_js_details', lookups[url.body_hash])
if lookups[url.body_hash]:
if isinstance(lookups[url.body_hash], list):
libname, version, path = lookups[url.body_hash][0].split("|")
other_files = len(lookups[url.body_hash])
url.add_feature('sane_js_details_to_print', (libname, version, path, other_files))
else:
# Predefined generic file
url.add_feature('sane_js_details_to_print', lookups[url.body_hash])
urls.append(url)
return render_template('hostname_popup.html',
tree_uuid=tree_uuid,
hostname_uuid=node_uuid,
hostname=hostnode.name,
urls=urls,
keys_response=keys_response,
keys_request=keys_request)
@app.route('/tree/url/<string:node_uuid>', methods=['GET'])
def urlnode_details(node_uuid: str):
with open(session["tree"], 'rb') as f:
ct = pickle.load(f)
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
def urlnode_post_request(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
if not urlnode.posted_data:
return
if isinstance(urlnode.posted_data, (dict, list)):
# JSON blob, pretty print.
posted = json.dumps(urlnode.posted_data, indent=2)
else:
posted = urlnode.posted_data
if isinstance(posted, bytes):
to_return = BytesIO(posted)
else:
to_return = BytesIO(posted.encode())
to_return.seek(0)
return send_file(to_return, mimetype='text/plain',
as_attachment=True, attachment_filename='posted_data.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>', methods=['GET'])
def urlnode_details(tree_uuid: str, node_uuid: str):
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
if not capture_dir:
return
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
to_return = BytesIO()
got_content = False
if hasattr(urlnode, 'body'):
@ -300,7 +363,7 @@ def tree(tree_uuid: str):
enable_mail_notification = True
else:
enable_mail_notification = False
tree_json, start_time, user_agent, root_url, meta = load_tree(capture_dir)
tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(capture_dir)
return render_template('tree.html', tree_json=tree_json, start_time=start_time,
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid,
meta=meta, enable_mail_notification=enable_mail_notification)

View File

@ -92,163 +92,22 @@ d3.selection.prototype.moveToBack = function() {
});
};
// What happen when clicking on a domain (load a modal display)
function hostnode_click(d) {
// Move the node to the front (end of the list)
var cur_node = d3.select("#node_" + d.data.uuid).moveToFront();
// Avoid duplicating overlays
cur_node.selectAll('.overlay').remove();
// Insert new svg element at this position
var overlay_hostname = cur_node.append('g')
.attr('class', 'overlay');
function hostnode_click_popup(d) {
window.open('/tree/' + treeUUID + '/hostname_popup/' + d.data.uuid, '_blank', 'width=1024,height=768,left=200,top=100');
};
cur_node.append('line')
.attr('id', 'overlay_link')
.style("opacity", "0.95")
.attr("stroke-width", "2")
.style("stroke", "gray");
function ProcessChildMessage(message) {
var element = document.getElementById("node_" + message);
element.scrollIntoView({behavior: "smooth", block: "center", inline: "nearest"});
var top_margin = 15;
var overlay_header_height = 50;
var left_margin = 30;
overlay_hostname
.datum({x: 0, y: 0, overlay_uuid: d.data.uuid})
.attr('id', 'overlay_' + d.data.uuid)
.attr("transform", "translate(" + 10 + "," + 15 + ")")
.call(d3.drag().on("drag", function(d, i) {
if (typeof d.x === 'undefined') { d.x = 0; } // Any real JS dev would kill me fo that, right?
if (typeof d.y === 'undefined') { d.y = 0; } // Maybe even twice.
d.x += d3.event.dx
d.y += d3.event.dy
d3.select(this)
.attr("transform", "translate(" + d.x + "," + d.y + ")");
cur_node.select('#overlay_link')
.attr("x2", d.x + left_margin + 10)
.attr("y2", d.y + top_margin + 15);
}));
overlay_hostname.append('rect')
.attr("rx", 6)
.attr("ry", 6)
.attr('x', 15)
.attr('y', 10)
.style("opacity", "0.95")
.attr("stroke", "black")
.attr('stroke-opacity', "0.8")
.attr("stroke-width", "2")
.attr("stroke-linecap", "round")
.attr("fill", "white");
// Modal display
var url = "/tree/hostname/" + d.data.uuid;
d3.json(url, {credentials: 'same-origin'}).then(urls => {
overlay_hostname
.append('circle')
.attr('id', 'overlay_circle_' + d.data.uuid)
.attr('height', overlay_header_height)
.attr('cx', left_margin + 10)
.attr('cy', top_margin + 15)
.attr('r', 2);
overlay_hostname
.append('text')
.attr('id', 'overlay_close_' + d.data.uuid)
.attr('height', overlay_header_height)
.attr('x', left_margin + 500) // Value updated based on the size of the rectangle max: max_overlay_width
.attr('y', top_margin + 25)
.style("font-size", '30px')
.text('\u2716')
.attr('cursor', 'pointer')
.on("click", () => {
main_svg.selectAll('#overlay_' + d.data.uuid).remove();
cur_node.select('#overlay_link').remove();
}
);
overlay_hostname.append('line')
.attr('id', 'overlay_separator_header' + d.data.uuid)
.style("stroke", "black")
.style('stroke-width', "1px")
.attr('x1', 20)
.attr('y1', overlay_header_height)
.attr('x2', 500)
.attr('y2', overlay_header_height);
var url_entries = overlay_hostname.append('svg');
var interval_entries = 10;
urls.forEach((url, index, array) => {
var jdata = JSON.parse(url);
var url_data = url_entries.append('svg')
.attr('class', 'url_data');
url_data.datum({'data': jdata});
url_data.append(d => text_entry(left_margin, top_margin + overlay_header_height + (interval_entries * index), urlnode_click, d));
url_data.append(d => icon_list(left_margin + 5, top_margin + 20 + overlay_header_height + (interval_entries * index), d, url_view=true));
});
var cur_url_data_height = 0;
url_entries.selectAll('.url_data').each(function(p, j){
d3.select(this).attr('y', cur_url_data_height);
cur_url_data_height += d3.select(this).node().getBBox().height;
var cur_icon_list_len = 0;
// set position of icons based of their length
d3.select(this).selectAll('.icon').each(function(p, j){
d3.select(this).attr('x', cur_icon_list_len);
cur_icon_list_len += d3.select(this).node().getBBox().width;
});
});
var overlay_bbox = overlay_hostname.node().getBBox()
overlay_hostname.append('line')
.attr('id', 'overlay_separator_footer' + d.data.uuid)
.style("stroke", "black")
.style('stroke-width', "1px")
.attr('x1', 20)
.attr('y1', overlay_bbox.height + 20)
.attr('x2', 500)
.attr('y2', overlay_bbox.height + 20);
var overlay_bbox = overlay_hostname.node().getBBox()
overlay_hostname
.append('text')
.attr('id', 'overlay_download_' + d.data.uuid)
.attr('height', overlay_header_height - 10)
.attr('x', left_margin)
.attr('y', overlay_bbox.height + overlay_header_height)
.style("font-size", '20px')
.text('Download URLs as text')
.attr('cursor', 'pointer')
.on("click", () => {
var url = "/tree/hostname/" + d.data.uuid + '/text';
d3.blob(url, {credentials: 'same-origin'}).then(data => {
saveAs(data, 'file.md');
});
});
var overlay_bbox = overlay_hostname.node().getBBox();
overlay_hostname.select('rect')
.attr('width', () => {
optimal_size = overlay_bbox.width + left_margin
return optimal_size < max_overlay_width ? optimal_size : max_overlay_width;
})
.attr('height', overlay_bbox.height + overlay_header_height);
overlay_hostname.select('#overlay_close_' + d.data.uuid)
.attr('x', overlay_hostname.select('rect').node().getBBox().width - 20);
overlay_hostname.select('#overlay_separator_header' + d.data.uuid)
.attr('x2', overlay_hostname.select('rect').node().getBBox().width + 10);
overlay_hostname.select('#overlay_separator_footer' + d.data.uuid)
.attr('x2', overlay_hostname.select('rect').node().getBBox().width + 10);
cur_node.select('#overlay_link')
.attr("x1", 10)
.attr("y1", 0)
.attr("x2", left_margin + 3)
.attr("y2", top_margin + 7);
});
var to_blink = d3.select("#node_" + message).select('text');
to_blink
.transition().duration(500) //Set transition
.style('fill', 'red')
.style('font-size', '20px')
.transition().duration(500) //Set transition
.style('fill', 'black')
.style('font-size', '16px');
};
function icon(key, icon_path, d, icon_size){
@ -483,7 +342,7 @@ function update(root, computed_node_width=0) {
// Set Hostname text
node_data
.append(d => text_entry(15, 5, hostnode_click, d));
.append(d => text_entry(15, 5, hostnode_click_popup, d)); // Popup
// Set list of icons
node_data
.append(d => icon_list(17, 35, d));

View File

@ -0,0 +1,78 @@
{% extends "main.html" %}
{% block title %}Details for {{ hostname }} {% endblock %}
{% block scripts %}
<script>
function whereAmI() {
window.opener.ProcessChildMessage("{{ hostname_uuid }}");
};
</script>
{% endblock %}
{% block content %}
<center>
<h3>{{ hostname }}</h3>
<button type="button" class="btn btn-secondary" onclick="whereAmI()">Locate node on tree</button>
<a href="{{ url_for('hostnode_details_text', tree_uuid=tree_uuid, node_uuid=hostname_uuid) }}" class="btn btn-info" role="button">Get URLs as text</a>
</center>
<p>Click on the URL to get the content of the response</p>
<ul class="list-group-flush">
{% for url in urls %}
<li class="list-group-item">
<p class="h3">{{ url.name }}</p>
<ul class="list-group">
<li class="list-group-item">
<p class="h4">Response</p>
<div>
{% for key, path in keys_response.items() %}
{% if url[key] %}
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
{%endif%}
{% endfor %}
</div>
{% if not url.empty_response %}
<div>
<a href="{{ url_for('urlnode_details', tree_uuid=tree_uuid, node_uuid=url.uuid) }}">
Download response body.
</a></br>
Body size: {{ sizeof_fmt(url.body.getbuffer().nbytes) }}
</div>
{% else %}
Empty body.
{%endif%}
{% if url.sane_js_details_to_print %}
<div>
{% if url.sane_js_details_to_print is string %}
{{ url.sane_js_details_to_print }}
{% else %}
This file is known as part of <b>{{ url.sane_js_details_to_print[0] }}</b>
version <b>{{ url.sane_js_details_to_print[1] }}</b>: <b>{{ url.sane_js_details_to_print[2] }}</b>.
{% if url.sane_js_details_to_print[3] > 1%}
It is also present in <b>{{ url.sane_js_details_to_print[3] -1 }}</b> other libraries.
{%endif%}
{%endif%}
</div>
{% endif %}
</li>
<li class="list-group-item">
<p class="h4">Request</p>
<div>
{% for key, path in keys_request.items() %}
{% if url[key] %}
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
{%endif%}
{% endfor %}
</div>
{% if url.posted_data %}
<a href="{{ url_for('urlnode_post_request', tree_uuid=tree_uuid, node_uuid=url.uuid) }}">
Download posted data
</a></br>
Posted data size: {{ sizeof_fmt(url.posted_data|length) }}
{% endif %}
</li>
</ul>
</li>
{% endfor %}
</ul>
{% endblock %}

View File

@ -9,6 +9,14 @@
</center>
</br>
<form role="form" action="scrape" method=post enctype=multipart/form-data>
<div class="form-group row">
<div class="col-sm-10">
<div class="form-check">
<input class="form-check-input"type="checkbox" name="listing" checked="true"></input>
<label for="listing" class="form-check-label">Display results on public page</label>
</div>
</div>
</div>
<div class="form-group row">
<label for="url" class="col-sm-2 col-form-label">URL:</label>
<div class="col-sm-10">
@ -33,14 +41,6 @@
</div>
</div>
<div class="form-group row">
<div class="col-sm-10">
<div class="form-check">
<input class="form-check-input"type="checkbox" name="listing" checked="true"></input>
<label for="listing" class="form-check-label">Display on main page</label>
</div>
</div>
</div>
<div class="form-group row">
<label for="cookies" class="col-sm-6">Cookies (JSON export from the Firefox plugin Cookie Quick Manager)</label>
<div class="col-sm-4">
@ -89,7 +89,7 @@
{% endfor%}
{% endfor%}
<div class="dropdown-divider"></div>
<center><button type="submit" class="btn btn-default">Start looking!</button></center>
<center><button type="submit" class="btn btn-primary">Start looking!</button></center>
</form>
</div>
{% endblock %}

View File

@ -4,7 +4,6 @@
<dl class="row">
<dt class="col-sm-3">{{ key }}</dt>
<dd class="col-sm-3">{{ value }}</dd>
</center>
</dl>
{% endfor %}
{% endif%}

View File

@ -40,6 +40,7 @@
{% block content %}
{{super()}}
<script>
var treeUUID = "{{ tree_uuid }}";
var treeData = {{ tree_json | safe }};
</script>