mirror of https://github.com/CIRCL/lookyloo
Merge branch 'new_popup'
commit
3e5a6cb8ab
|
@ -3,10 +3,11 @@
|
|||
"splash_loglevel": "WARNING",
|
||||
"only_global_lookups": true,
|
||||
"splash_url": "http://127.0.0.1:8050",
|
||||
"default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
|
||||
"cache_clean_user": {},
|
||||
"time_delta_on_index": {
|
||||
"weeks": 0,
|
||||
"days": 1,
|
||||
"weeks": 1,
|
||||
"days": 0,
|
||||
"hours": 0
|
||||
},
|
||||
"enable_mail_notification": false,
|
||||
|
@ -23,6 +24,7 @@
|
|||
"splash_loglevel": "(Splash) INFO is *very* verbose.",
|
||||
"only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network",
|
||||
"splash_url": "URL to connect to splash",
|
||||
"default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA",
|
||||
"cache_clean_user": "Format: {username: password}",
|
||||
"time_delta_on_index": "Time interval of the capture displayed on the index",
|
||||
"enable_mail_notification": "Enable email notification or not",
|
||||
|
|
|
@ -2,5 +2,8 @@
|
|||
"VirusTotal": {
|
||||
"apikey": "KEY",
|
||||
"autosubmit": false
|
||||
},
|
||||
"SaneJS": {
|
||||
"enabled": true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
# AdGraph
|
||||
|
||||
## Implementation
|
||||
|
||||
* https://github.com/uiowa-irl/AdGraph
|
||||
|
||||
4000+ lines of patch on Chromium version 69.0.3441.0 (released 25 May 2018)
|
||||
|
||||
## Paper
|
||||
|
||||
* https://umariqbal.com/papers/adgraph-sp2020.pdf
|
||||
|
||||
## Key points for lookyloo
|
||||
|
||||
### Static, node by node
|
||||
|
||||
* features of the node
|
||||
* keywords in URL
|
||||
* keywords in content
|
||||
* length & parameters of the URL
|
||||
* On image: OCR (?)
|
||||
|
||||
* Domain => blocklists (ublock)
|
||||
|
||||
* Javascript analysis:
|
||||
* eval
|
||||
* specific keywords (tracking, ads, fingerprint...)
|
||||
* specific JS calls (track mouse, scrolling)
|
||||
* Async calls are very often used by ads, recommandation: https://www.iab.com/wp-content/uploads/2017/08/IABNewAdPortfolio_FINAL_2017.pdf
|
||||
* /!\ anything obfuscated is just under the radar
|
||||
|
||||
### Dynamic, pased on the tree
|
||||
|
||||
* size
|
||||
* position in the tree
|
||||
* parent features
|
||||
* siblings
|
||||
* number and type of children
|
||||
|
||||
# Other ressources
|
||||
|
||||
* Ads standards: https://github.com/InteractiveAdvertisingBureau - https://iabtechlab.com/standards/
|
||||
* Standard API for Ads bidding: https://github.com/prebid/
|
|
@ -20,3 +20,7 @@ class CreateDirectoryException(LookylooException):
|
|||
|
||||
class ConfigError(LookylooException):
|
||||
pass
|
||||
|
||||
|
||||
class MissingUUID(LookylooException):
|
||||
pass
|
||||
|
|
|
@ -18,14 +18,13 @@ from uuid import uuid4
|
|||
from zipfile import ZipFile
|
||||
|
||||
from defang import refang # type: ignore
|
||||
from har2tree import CrawledTree, Har2TreeError, HarFile
|
||||
from pysanejs import SaneJS
|
||||
from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode
|
||||
from redis import Redis
|
||||
from scrapysplashwrapper import crawl
|
||||
|
||||
from .exceptions import NoValidHarFile
|
||||
from .exceptions import NoValidHarFile, MissingUUID
|
||||
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template
|
||||
from .modules import VirusTotal
|
||||
from .modules import VirusTotal, SaneJavaScript
|
||||
|
||||
|
||||
class Lookyloo():
|
||||
|
@ -50,17 +49,14 @@ class Lookyloo():
|
|||
self.vt = VirusTotal(self.configs['modules']['VirusTotal'])
|
||||
if not self.vt.available:
|
||||
self.logger.warning('Unable to setup the VirusTotal module')
|
||||
if 'SaneJS' in self.configs['modules']:
|
||||
self.sanejs = SaneJavaScript(self.configs['modules']['SaneJS'])
|
||||
if not self.sanejs.available:
|
||||
self.logger.warning('Unable to setup the SaneJS module')
|
||||
|
||||
if not self.redis.exists('cache_loaded'):
|
||||
self._init_existing_dumps()
|
||||
|
||||
# Try to reach sanejs
|
||||
self.sanejs = SaneJS()
|
||||
if not self.sanejs.is_up:
|
||||
self.use_sane_js = False
|
||||
else:
|
||||
self.use_sane_js = True
|
||||
|
||||
def rebuild_cache(self) -> None:
|
||||
self.redis.flushdb()
|
||||
self._init_existing_dumps()
|
||||
|
@ -88,6 +84,18 @@ class Lookyloo():
|
|||
sample_config = json.load(_c)
|
||||
return sample_config[entry]
|
||||
|
||||
def get_urlnode_from_tree(self, capture_dir: Path, node_uuid: str) -> URLNode:
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
if not ct:
|
||||
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
|
||||
return ct.root_hartree.get_url_node_by_uuid(node_uuid)
|
||||
|
||||
def get_hostnode_from_tree(self, capture_dir: Path, node_uuid: str) -> HostNode:
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
if not ct:
|
||||
raise MissingUUID(f'Unable to find UUID {node_uuid} in {capture_dir}')
|
||||
return ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||
|
||||
def get_statistics(self, capture_dir: Path) -> Dict[str, Any]:
|
||||
# We need the pickle
|
||||
ct = self._load_pickle(capture_dir / 'tree.pickle')
|
||||
|
@ -151,7 +159,7 @@ class Lookyloo():
|
|||
self.redis.hset('lookup_dirs', uuid, str(capture_dir))
|
||||
return
|
||||
|
||||
har = HarFile(har_files[0])
|
||||
har = HarFile(har_files[0], uuid)
|
||||
|
||||
redirects = har.initial_redirects
|
||||
incomplete_redirects = False
|
||||
|
@ -268,7 +276,7 @@ class Lookyloo():
|
|||
except Exception as e:
|
||||
logging.exception(e)
|
||||
|
||||
def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, str, Dict[str, str]]:
|
||||
def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, str]]:
|
||||
har_files = sorted(capture_dir.glob('*.har'))
|
||||
pickle_file = capture_dir / 'tree.pickle'
|
||||
try:
|
||||
|
@ -284,7 +292,7 @@ class Lookyloo():
|
|||
ct = CrawledTree(har_files, uuid)
|
||||
with pickle_file.open('wb') as _p:
|
||||
pickle.dump(ct, _p)
|
||||
return str(pickle_file), ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
|
||||
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
|
||||
except Har2TreeError as e:
|
||||
raise NoValidHarFile(e.message)
|
||||
|
||||
|
@ -312,11 +320,6 @@ class Lookyloo():
|
|||
def get_capture(self, capture_dir: Path) -> BytesIO:
|
||||
return self._get_raw(capture_dir)
|
||||
|
||||
def sane_js_query(self, sha512: str) -> Dict[str, Any]:
|
||||
if self.use_sane_js:
|
||||
return self.sanejs.sha512(sha512)
|
||||
return {'response': []}
|
||||
|
||||
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None,
|
||||
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
|
||||
perma_uuid: str=None, os: str=None, browser: str=None) -> Union[bool, str]:
|
||||
|
@ -339,7 +342,12 @@ class Lookyloo():
|
|||
return False
|
||||
|
||||
cookies = load_cookies(cookies_pseudofile)
|
||||
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=user_agent,
|
||||
if not user_agent:
|
||||
# Catch case where the UA is broken on the UI, and the async submission.
|
||||
ua: str = self.get_config('default_user_agent') # type: ignore
|
||||
else:
|
||||
ua = user_agent
|
||||
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
|
||||
log_enabled=True, log_level=self.get_config('splash_loglevel'))
|
||||
if not items:
|
||||
# broken
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from typing import Dict, Any, Optional, List, Union
|
||||
from datetime import date
|
||||
import hashlib
|
||||
import json
|
||||
|
@ -13,6 +13,104 @@ from .helpers import get_homedir
|
|||
from .exceptions import ConfigError
|
||||
|
||||
import vt # type: ignore
|
||||
from pysanejs import SaneJS
|
||||
|
||||
|
||||
class SaneJavaScript():
|
||||
|
||||
skip_lookup: Dict[str, str] = {
|
||||
"717ea0ff7f3f624c268eccb244e24ec1305ab21557abb3d6f1a7e183ff68a2d28f13d1d2af926c9ef6d1fb16dd8cbe34cd98cacf79091dddc7874dcee21ecfdc": "This is a 1*1 pixel GIF",
|
||||
"e508d5d17e94d14b126164082342a9ca4774f404e87a3dd56c26812493ee18d9c3d6daacca979134a94a003066aca24116de874596d00d1e52130c1283d54209": "This is a 1*1 pixel GIF",
|
||||
"2d073e10ae40fde434eb31cbedd581a35cd763e51fb7048b88caa5f949b1e6105e37a228c235bc8976e8db58ed22149cfccf83b40ce93a28390566a28975744a": "This is a 1*1 pixel GIF",
|
||||
"84e24a70b78e9de9c9d0dfeb49f3f4247dbc1c715d8844471ee40669270682e199d48f5fbec62bd984c9c0270534b407c4d2561dd6c05adec3c83c1534f32d5c": "This is a 1*1 pixel GIF",
|
||||
"d5da26b5d496edb0221df1a4057a8b0285d15592a8f8dc7016a294df37ed335f3fde6a2252962e0df38b62847f8b771463a0124ef3f84299f262ed9d9d3cee4c": "This is a 1*1 pixel GIF",
|
||||
"f7a5f748f4c0d3096a3ca972886fe9a9dff5dce7792779ec6ffc42fa880b3815e2e4c3bdea452352f3844b81864c9bfb7861f66ac961cfa66cb9cb4febe568e8": "This is a 1*1 pixel GIF",
|
||||
"b2ca25a3311dc42942e046eb1a27038b71d689925b7d6b3ebb4d7cd2c7b9a0c7de3d10175790ac060dc3f8acf3c1708c336626be06879097f4d0ecaa7f567041": "This is a 1*1 pixel GIF",
|
||||
"b8d82d64ec656c63570b82215564929adad167e61643fd72283b94f3e448ef8ab0ad42202f3537a0da89960bbdc69498608fc6ec89502c6c338b6226c8bf5e14": "This is a 1*1 pixel GIF",
|
||||
"2991c3aa1ba61a62c1cccd990c0679a1fb8dccd547d153ec0920b91a75ba20820de1d1c206f66d083bf2585d35050f0a39cd7a3e11c03882dafec907d27a0180": "This is a 1*1 pixel GIF",
|
||||
"b1a6cfa7b21dbb0b281d241af609f3ba7f3a63e5668095bba912bf7cfd7f0320baf7c3b0bfabd0f8609448f39902baeb145ba7a2d8177fe22a6fcea03dd29be1": "This is a 1*1 pixel GIF",
|
||||
"ebfe0c0df4bcc167d5cb6ebdd379f9083df62bef63a23818e1c6adf0f64b65467ea58b7cd4d03cf0a1b1a2b07fb7b969bf35f25f1f8538cc65cf3eebdf8a0910": "This is a 1*1 pixel GIF",
|
||||
"1d68b92e8d822fe82dc7563edd7b37f3418a02a89f1a9f0454cca664c2fc2565235e0d85540ff9be0b20175be3f5b7b4eae1175067465d5cca13486aab4c582c": "This is a 1*1 pixel GIF",
|
||||
"ac44da7f455bfae52b883639964276026fb259320902aa813d0333e021c356a7b3e3537b297f9a2158e588c302987ce0854866c039d1bb0ffb27f67560739db2": "This is a 1*1 pixel GIF",
|
||||
"921944dc10fbfb6224d69f0b3ac050f4790310fd1bcac3b87c96512ad5ed9a268824f3f5180563d372642071b4704c979d209baf40bc0b1c9a714769aba7dfc7": "This is a 1*1 pixel GIF",
|
||||
"89dfc38ec77cf258362e4db7c8203cae8a02c0fe4f99265b0539ec4f810c84f8451e22c9bef1ebc59b4089af7e93e378e053c542a5967ec4912d4c1fc5de22f0": "This is a 1*1 pixel GIF",
|
||||
"280ea4383ee6b37051d91c5af30a5ce72aa4439340fc6d31a4fbe7ba8a8156eb7893891d5b2371b9fc4934a78f08de3d57e5b63fa9d279a317dcbefb8a07a6b0": "This is a 1*1 pixel GIF",
|
||||
"3844065e1dd778a05e8cc39901fbf3191ded380d594359df137901ec56ca52e03d57eb60acc2421a0ee74f0733bbb5d781b7744685c26fb013a236f49b02fed3": "This is a 1*1 pixel GIF",
|
||||
"bd9ab35dde3a5242b04c159187732e13b0a6da50ddcff7015dfb78cdd68743e191eaf5cddedd49bef7d2d5a642c217272a40e5ba603fe24ca676a53f8c417c5d": "This is a 1*1 pixel GIF",
|
||||
"d052ecec2839340876eb57247cfc2e777dd7f2e868dc37cd3f3f740c8deb94917a0c9f2a4fc8229987a0b91b04726de2d1e9f6bcbe3f9bef0e4b7e0d7f65ea12": "This is a 1*1 pixel GIF",
|
||||
"8717074ddf1198d27b9918132a550cb4ba343794cc3d304a793f9d78c9ff6c4929927b414141d40b6f6ad296725520f4c63edeb660ed530267766c2ab74ee4a9": "This is a 1*1 pixel GIF",
|
||||
"6834f1548f26b94357fcc3312a3491e8c87080a84f678f990beb2c745899a01e239964521e64a534d7d5554222f728af966ec6ec8291bc64d2005861bcfd78ec": "This is a 1*1 pixel GIF",
|
||||
"3be8176915593e79bc280d08984a16c29c495bc53be9b439276094b8dcd3764a3c72a046106a06b958e08e67451fe02743175c621a1faa261fe7a9691cc77141": "This is a 1*1 pixel GIF",
|
||||
"826225fc21717d8861a05b9d2f959539aad2d2b131b2afed75d88fbca535e1b0d5a0da8ac69713a0876a0d467848a37a0a7f926aeafad8cf28201382d16466ab": "This is a 1*1 pixel GIF",
|
||||
"202612457d9042fe853daab3ddcc1f0f960c5ffdbe8462fa435713e4d1d85ff0c3f197daf8dba15bda9f5266d7e1f9ecaeee045cbc156a4892d2f931fe6fa1bb": "This is a 1*1 pixel GIF",
|
||||
"b82c6aa1ae927ade5fadbbab478cfaef26d21c1ac441f48e69cfc04cdb779b1e46d7668b4368b933213276068e52f9060228907720492a70fd9bc897191ee77c": "This is a 1*1 pixel GIF",
|
||||
"763de1053a56a94eef4f72044adb2aa370b98ffa6e0add0b1cead7ee27da519e223921c681ae1db3311273f45d0dd3dc022d102d42ce210c90cb3e761b178438": "This is a 1*1 pixel GIF",
|
||||
"69e2da5cdc318fc237eaa243b6ea7ecc83b68dbdea8478dc69154abdda86ecb4e16c35891cc1facb3ce7e0cf19d5abf189c50f59c769777706f4558f6442abbc": "This is a 1*1 pixel GIF",
|
||||
"16dd1560fdd43c3eee7bcf622d940be93e7e74dee90286da37992d69cea844130911b97f41c71f8287b54f00bd3a388191112f490470cf27c374d524f49ba516": "This is a 1*1 pixel GIF",
|
||||
"01211111688dc2007519ff56603fbe345d057337b911c829aaee97b8d02e7d885e7a2c2d51730f54a04aebc1821897c8041f15e216f1c973ed313087fa91a3fb": "This is a 1*1 pixel GIF",
|
||||
"71db01662075fac031dea18b2c766826c77dbab01400a8642cdc7059394841d5df9020076554c3beca6f808187d42e1a1acc98fad9a0e1ad32ae869145f53746": "This is a 1*1 pixel GIF",
|
||||
# "": "This is a 1*1 pixel GIF",
|
||||
"f1c33e72643ce366fd578e3b5d393799e8c9ea27b180987826af43b4fc00b65a4eaae5e6426a23448956fee99e3108c6a86f32fb4896c156e24af0571a11c498": "This is a 1*1 pixel PNG",
|
||||
"dc7c40381b3d22919e32c1b700ccb77b1b0aea2690642d01c1ac802561e135c01d5a4d2a0ea18efc0ec3362e8c549814a10a23563f1f56bd62aee0ced7e2bd99": "This is a 1*1 pixel PNG",
|
||||
"c2c239cb5cdd0b670780ad6414ef6be9ccd4c21ce46bb93d1fa3120ac812f1679445162978c3df05cb2e1582a1844cc4c41cf74960b8fdae3123999c5d2176cc": "This is a 1*1 pixel PNG",
|
||||
# "": "This is a 1*1 pixel PNG",
|
||||
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e": "This is an empty file"
|
||||
}
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
if not ('enabled' in config or config['enabled']):
|
||||
self.available = False
|
||||
return
|
||||
self.client = SaneJS()
|
||||
if not self.client.is_up:
|
||||
self.available = False
|
||||
return
|
||||
self.available = True
|
||||
self.storage_dir = get_homedir() / 'sanejs'
|
||||
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def hashes_lookup(self, sha512: Union[List[str], str], force: bool=False) -> Dict[str, Any]:
|
||||
if isinstance(sha512, str):
|
||||
hashes = [sha512]
|
||||
else:
|
||||
hashes = sha512
|
||||
|
||||
today_dir = self.storage_dir / date.today().isoformat()
|
||||
today_dir.mkdir(parents=True, exist_ok=True)
|
||||
sanejs_unknowns = today_dir / 'unknown'
|
||||
unknown_hashes = []
|
||||
if sanejs_unknowns.exists():
|
||||
with sanejs_unknowns.open() as f:
|
||||
unknown_hashes = [line.strip() for line in f.readlines()]
|
||||
|
||||
to_return = {h: details for h, details in self.skip_lookup.items() if h in sha512}
|
||||
|
||||
to_lookup = [h for h in hashes if h not in self.skip_lookup]
|
||||
if not force:
|
||||
to_lookup = [h for h in to_lookup if (h not in unknown_hashes
|
||||
and not (today_dir / h).exists())]
|
||||
for h in to_lookup:
|
||||
response = self.client.sha512(h)
|
||||
if 'error' in response:
|
||||
# Server not ready
|
||||
break
|
||||
if 'response' in response and response['response']:
|
||||
cached_path = today_dir / h
|
||||
with cached_path.open('w') as f:
|
||||
json.dump(response['response'], f)
|
||||
to_return[h] = response['response']
|
||||
else:
|
||||
unknown_hashes.append(h)
|
||||
|
||||
for h in hashes:
|
||||
cached_path = today_dir / h
|
||||
if h in unknown_hashes or h in to_return:
|
||||
continue
|
||||
elif cached_path.exists():
|
||||
with cached_path.open() as f:
|
||||
to_return[h] = json.load(f)
|
||||
|
||||
return to_return
|
||||
|
||||
|
||||
class VirusTotal():
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,15 +1,14 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import pickle
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
from io import BytesIO
|
||||
import os
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import json
|
||||
|
||||
from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response, flash
|
||||
from flask import Flask, render_template, request, send_file, redirect, url_for, Response, flash
|
||||
from flask_bootstrap import Bootstrap # type: ignore
|
||||
from flask_httpauth import HTTPDigestAuth # type: ignore
|
||||
|
||||
|
@ -18,7 +17,7 @@ from lookyloo.lookyloo import Lookyloo
|
|||
from lookyloo.exceptions import NoValidHarFile
|
||||
from .proxied import ReverseProxied
|
||||
|
||||
from typing import Tuple, Optional, Dict, Any
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
import logging
|
||||
|
||||
|
@ -48,6 +47,19 @@ time_delta_on_index = lookyloo.get_config('time_delta_on_index')
|
|||
logging.basicConfig(level=lookyloo.get_config('loglevel'))
|
||||
|
||||
|
||||
# Method to make sizes in bytes human readable
|
||||
# Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
|
||||
def sizeof_fmt(num, suffix='B'):
|
||||
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
|
||||
if abs(num) < 1024.0:
|
||||
return "%3.1f%s%s" % (num, unit, suffix)
|
||||
num /= 1024.0
|
||||
return "%.1f%s%s" % (num, 'Yi', suffix)
|
||||
|
||||
|
||||
app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt)
|
||||
|
||||
|
||||
@auth.get_password
|
||||
def get_pw(username: str) -> Optional[str]:
|
||||
if username in user:
|
||||
|
@ -79,14 +91,6 @@ def rebuild_tree(tree_uuid: str):
|
|||
return redirect(url_for('index'))
|
||||
|
||||
|
||||
# keep
|
||||
def load_tree(capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, Any]]:
|
||||
session.clear()
|
||||
temp_file_name, tree_json, tree_time, tree_ua, tree_root_url, meta = lookyloo.load_tree(capture_dir)
|
||||
session["tree"] = temp_file_name
|
||||
return tree_json, tree_time, tree_ua, tree_root_url, meta
|
||||
|
||||
|
||||
@app.route('/submit', methods=['POST', 'GET'])
|
||||
def submit():
|
||||
to_query = request.get_json(force=True)
|
||||
|
@ -116,11 +120,12 @@ def scrape_web():
|
|||
return render_template('scrape.html', user_agents=user_agents)
|
||||
|
||||
|
||||
@app.route('/tree/hostname/<string:node_uuid>/text', methods=['GET'])
|
||||
def hostnode_details_text(node_uuid: str):
|
||||
with open(session["tree"], 'rb') as f:
|
||||
ct = pickle.load(f)
|
||||
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||
@app.route('/tree/<string:tree_uuid>/hostname/<string:node_uuid>/text', methods=['GET'])
|
||||
def hostnode_details_text(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
hostnode = lookyloo.get_hostnode_from_tree(capture_dir, node_uuid)
|
||||
urls = []
|
||||
for url in hostnode.urls:
|
||||
urls.append(url.name)
|
||||
|
@ -134,27 +139,85 @@ def hostnode_details_text(node_uuid: str):
|
|||
as_attachment=True, attachment_filename='file.md')
|
||||
|
||||
|
||||
@app.route('/tree/hostname/<string:node_uuid>', methods=['GET'])
|
||||
def hostnode_details(node_uuid: str):
|
||||
with open(session["tree"], 'rb') as f:
|
||||
ct = pickle.load(f)
|
||||
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||
@app.route('/tree/<string:tree_uuid>/hostname_popup/<string:node_uuid>', methods=['GET'])
|
||||
def hostnode_popup(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
hostnode = lookyloo.get_hostnode_from_tree(capture_dir, node_uuid)
|
||||
keys_response = {
|
||||
'js': "/static/javascript.png",
|
||||
'exe': "/static/exe.png",
|
||||
'css': "/static/css.png",
|
||||
'font': "/static/font.png",
|
||||
'html': "/static/html.png",
|
||||
'json': "/static/json.png",
|
||||
'iframe': "/static/ifr.png",
|
||||
'image': "/static/img.png",
|
||||
'unknown_mimetype': "/static/wtf.png",
|
||||
'video': "/static/video.png",
|
||||
'response_cookie': "/static/cookie_received.png",
|
||||
'redirect': "/static/redirect.png",
|
||||
'redirect_to_nothing': "/static/cookie_in_url.png"
|
||||
}
|
||||
keys_request = {
|
||||
'request_cookie': "/static/cookie_read.png",
|
||||
}
|
||||
|
||||
urls = []
|
||||
if lookyloo.sanejs.available:
|
||||
to_lookup = [url.body_hash for url in hostnode.urls if hasattr(url, 'body_hash')]
|
||||
lookups = lookyloo.sanejs.hashes_lookup(to_lookup)
|
||||
for url in hostnode.urls:
|
||||
if hasattr(url, 'body_hash'):
|
||||
sane_js_r = lookyloo.sane_js_query(url.body_hash)
|
||||
if sane_js_r.get('response'):
|
||||
url.add_feature('sane_js_details', sane_js_r['response'])
|
||||
print('######## SANEJS ##### ', url.sane_js_details)
|
||||
urls.append(url.to_json())
|
||||
return json.dumps(urls)
|
||||
if lookyloo.sanejs.available and hasattr(url, 'body_hash') and url.body_hash in lookups:
|
||||
url.add_feature('sane_js_details', lookups[url.body_hash])
|
||||
if lookups[url.body_hash]:
|
||||
if isinstance(lookups[url.body_hash], list):
|
||||
libname, version, path = lookups[url.body_hash][0].split("|")
|
||||
other_files = len(lookups[url.body_hash])
|
||||
url.add_feature('sane_js_details_to_print', (libname, version, path, other_files))
|
||||
else:
|
||||
# Predefined generic file
|
||||
url.add_feature('sane_js_details_to_print', lookups[url.body_hash])
|
||||
urls.append(url)
|
||||
return render_template('hostname_popup.html',
|
||||
tree_uuid=tree_uuid,
|
||||
hostname_uuid=node_uuid,
|
||||
hostname=hostnode.name,
|
||||
urls=urls,
|
||||
keys_response=keys_response,
|
||||
keys_request=keys_request)
|
||||
|
||||
|
||||
@app.route('/tree/url/<string:node_uuid>', methods=['GET'])
|
||||
def urlnode_details(node_uuid: str):
|
||||
with open(session["tree"], 'rb') as f:
|
||||
ct = pickle.load(f)
|
||||
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
|
||||
def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
|
||||
if not urlnode.posted_data:
|
||||
return
|
||||
if isinstance(urlnode.posted_data, (dict, list)):
|
||||
# JSON blob, pretty print.
|
||||
posted = json.dumps(urlnode.posted_data, indent=2)
|
||||
else:
|
||||
posted = urlnode.posted_data
|
||||
|
||||
if isinstance(posted, bytes):
|
||||
to_return = BytesIO(posted)
|
||||
else:
|
||||
to_return = BytesIO(posted.encode())
|
||||
to_return.seek(0)
|
||||
return send_file(to_return, mimetype='text/plain',
|
||||
as_attachment=True, attachment_filename='posted_data.txt')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>', methods=['GET'])
|
||||
def urlnode_details(tree_uuid: str, node_uuid: str):
|
||||
capture_dir = lookyloo.lookup_capture_dir(tree_uuid)
|
||||
if not capture_dir:
|
||||
return
|
||||
urlnode = lookyloo.get_urlnode_from_tree(capture_dir, node_uuid)
|
||||
to_return = BytesIO()
|
||||
got_content = False
|
||||
if hasattr(urlnode, 'body'):
|
||||
|
@ -300,7 +363,7 @@ def tree(tree_uuid: str):
|
|||
enable_mail_notification = True
|
||||
else:
|
||||
enable_mail_notification = False
|
||||
tree_json, start_time, user_agent, root_url, meta = load_tree(capture_dir)
|
||||
tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(capture_dir)
|
||||
return render_template('tree.html', tree_json=tree_json, start_time=start_time,
|
||||
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid,
|
||||
meta=meta, enable_mail_notification=enable_mail_notification)
|
||||
|
|
|
@ -92,163 +92,22 @@ d3.selection.prototype.moveToBack = function() {
|
|||
});
|
||||
};
|
||||
|
||||
// What happen when clicking on a domain (load a modal display)
|
||||
function hostnode_click(d) {
|
||||
// Move the node to the front (end of the list)
|
||||
var cur_node = d3.select("#node_" + d.data.uuid).moveToFront();
|
||||
// Avoid duplicating overlays
|
||||
cur_node.selectAll('.overlay').remove();
|
||||
// Insert new svg element at this position
|
||||
var overlay_hostname = cur_node.append('g')
|
||||
.attr('class', 'overlay');
|
||||
function hostnode_click_popup(d) {
|
||||
window.open('/tree/' + treeUUID + '/hostname_popup/' + d.data.uuid, '_blank', 'width=1024,height=768,left=200,top=100');
|
||||
};
|
||||
|
||||
cur_node.append('line')
|
||||
.attr('id', 'overlay_link')
|
||||
.style("opacity", "0.95")
|
||||
.attr("stroke-width", "2")
|
||||
.style("stroke", "gray");
|
||||
function ProcessChildMessage(message) {
|
||||
var element = document.getElementById("node_" + message);
|
||||
element.scrollIntoView({behavior: "smooth", block: "center", inline: "nearest"});
|
||||
|
||||
var top_margin = 15;
|
||||
var overlay_header_height = 50;
|
||||
var left_margin = 30;
|
||||
|
||||
overlay_hostname
|
||||
.datum({x: 0, y: 0, overlay_uuid: d.data.uuid})
|
||||
.attr('id', 'overlay_' + d.data.uuid)
|
||||
.attr("transform", "translate(" + 10 + "," + 15 + ")")
|
||||
.call(d3.drag().on("drag", function(d, i) {
|
||||
if (typeof d.x === 'undefined') { d.x = 0; } // Any real JS dev would kill me fo that, right?
|
||||
if (typeof d.y === 'undefined') { d.y = 0; } // Maybe even twice.
|
||||
d.x += d3.event.dx
|
||||
d.y += d3.event.dy
|
||||
d3.select(this)
|
||||
.attr("transform", "translate(" + d.x + "," + d.y + ")");
|
||||
cur_node.select('#overlay_link')
|
||||
.attr("x2", d.x + left_margin + 10)
|
||||
.attr("y2", d.y + top_margin + 15);
|
||||
}));
|
||||
|
||||
overlay_hostname.append('rect')
|
||||
.attr("rx", 6)
|
||||
.attr("ry", 6)
|
||||
.attr('x', 15)
|
||||
.attr('y', 10)
|
||||
.style("opacity", "0.95")
|
||||
.attr("stroke", "black")
|
||||
.attr('stroke-opacity', "0.8")
|
||||
.attr("stroke-width", "2")
|
||||
.attr("stroke-linecap", "round")
|
||||
.attr("fill", "white");
|
||||
|
||||
// Modal display
|
||||
var url = "/tree/hostname/" + d.data.uuid;
|
||||
d3.json(url, {credentials: 'same-origin'}).then(urls => {
|
||||
overlay_hostname
|
||||
.append('circle')
|
||||
.attr('id', 'overlay_circle_' + d.data.uuid)
|
||||
.attr('height', overlay_header_height)
|
||||
.attr('cx', left_margin + 10)
|
||||
.attr('cy', top_margin + 15)
|
||||
.attr('r', 2);
|
||||
|
||||
overlay_hostname
|
||||
.append('text')
|
||||
.attr('id', 'overlay_close_' + d.data.uuid)
|
||||
.attr('height', overlay_header_height)
|
||||
.attr('x', left_margin + 500) // Value updated based on the size of the rectangle max: max_overlay_width
|
||||
.attr('y', top_margin + 25)
|
||||
.style("font-size", '30px')
|
||||
.text('\u2716')
|
||||
.attr('cursor', 'pointer')
|
||||
.on("click", () => {
|
||||
main_svg.selectAll('#overlay_' + d.data.uuid).remove();
|
||||
cur_node.select('#overlay_link').remove();
|
||||
}
|
||||
);
|
||||
|
||||
overlay_hostname.append('line')
|
||||
.attr('id', 'overlay_separator_header' + d.data.uuid)
|
||||
.style("stroke", "black")
|
||||
.style('stroke-width', "1px")
|
||||
.attr('x1', 20)
|
||||
.attr('y1', overlay_header_height)
|
||||
.attr('x2', 500)
|
||||
.attr('y2', overlay_header_height);
|
||||
|
||||
var url_entries = overlay_hostname.append('svg');
|
||||
|
||||
var interval_entries = 10;
|
||||
urls.forEach((url, index, array) => {
|
||||
var jdata = JSON.parse(url);
|
||||
var url_data = url_entries.append('svg')
|
||||
.attr('class', 'url_data');
|
||||
url_data.datum({'data': jdata});
|
||||
url_data.append(d => text_entry(left_margin, top_margin + overlay_header_height + (interval_entries * index), urlnode_click, d));
|
||||
url_data.append(d => icon_list(left_margin + 5, top_margin + 20 + overlay_header_height + (interval_entries * index), d, url_view=true));
|
||||
});
|
||||
|
||||
var cur_url_data_height = 0;
|
||||
url_entries.selectAll('.url_data').each(function(p, j){
|
||||
d3.select(this).attr('y', cur_url_data_height);
|
||||
cur_url_data_height += d3.select(this).node().getBBox().height;
|
||||
var cur_icon_list_len = 0;
|
||||
// set position of icons based of their length
|
||||
d3.select(this).selectAll('.icon').each(function(p, j){
|
||||
d3.select(this).attr('x', cur_icon_list_len);
|
||||
cur_icon_list_len += d3.select(this).node().getBBox().width;
|
||||
});
|
||||
});
|
||||
|
||||
var overlay_bbox = overlay_hostname.node().getBBox()
|
||||
overlay_hostname.append('line')
|
||||
.attr('id', 'overlay_separator_footer' + d.data.uuid)
|
||||
.style("stroke", "black")
|
||||
.style('stroke-width', "1px")
|
||||
.attr('x1', 20)
|
||||
.attr('y1', overlay_bbox.height + 20)
|
||||
.attr('x2', 500)
|
||||
.attr('y2', overlay_bbox.height + 20);
|
||||
|
||||
var overlay_bbox = overlay_hostname.node().getBBox()
|
||||
overlay_hostname
|
||||
.append('text')
|
||||
.attr('id', 'overlay_download_' + d.data.uuid)
|
||||
.attr('height', overlay_header_height - 10)
|
||||
.attr('x', left_margin)
|
||||
.attr('y', overlay_bbox.height + overlay_header_height)
|
||||
.style("font-size", '20px')
|
||||
.text('Download URLs as text')
|
||||
.attr('cursor', 'pointer')
|
||||
.on("click", () => {
|
||||
var url = "/tree/hostname/" + d.data.uuid + '/text';
|
||||
d3.blob(url, {credentials: 'same-origin'}).then(data => {
|
||||
saveAs(data, 'file.md');
|
||||
});
|
||||
});
|
||||
|
||||
var overlay_bbox = overlay_hostname.node().getBBox();
|
||||
overlay_hostname.select('rect')
|
||||
.attr('width', () => {
|
||||
optimal_size = overlay_bbox.width + left_margin
|
||||
return optimal_size < max_overlay_width ? optimal_size : max_overlay_width;
|
||||
})
|
||||
.attr('height', overlay_bbox.height + overlay_header_height);
|
||||
|
||||
overlay_hostname.select('#overlay_close_' + d.data.uuid)
|
||||
.attr('x', overlay_hostname.select('rect').node().getBBox().width - 20);
|
||||
|
||||
overlay_hostname.select('#overlay_separator_header' + d.data.uuid)
|
||||
.attr('x2', overlay_hostname.select('rect').node().getBBox().width + 10);
|
||||
overlay_hostname.select('#overlay_separator_footer' + d.data.uuid)
|
||||
.attr('x2', overlay_hostname.select('rect').node().getBBox().width + 10);
|
||||
|
||||
|
||||
cur_node.select('#overlay_link')
|
||||
.attr("x1", 10)
|
||||
.attr("y1", 0)
|
||||
.attr("x2", left_margin + 3)
|
||||
.attr("y2", top_margin + 7);
|
||||
});
|
||||
var to_blink = d3.select("#node_" + message).select('text');
|
||||
to_blink
|
||||
.transition().duration(500) //Set transition
|
||||
.style('fill', 'red')
|
||||
.style('font-size', '20px')
|
||||
.transition().duration(500) //Set transition
|
||||
.style('fill', 'black')
|
||||
.style('font-size', '16px');
|
||||
};
|
||||
|
||||
function icon(key, icon_path, d, icon_size){
|
||||
|
@ -483,7 +342,7 @@ function update(root, computed_node_width=0) {
|
|||
|
||||
// Set Hostname text
|
||||
node_data
|
||||
.append(d => text_entry(15, 5, hostnode_click, d));
|
||||
.append(d => text_entry(15, 5, hostnode_click_popup, d)); // Popup
|
||||
// Set list of icons
|
||||
node_data
|
||||
.append(d => icon_list(17, 35, d));
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
{% extends "main.html" %}
|
||||
|
||||
{% block title %}Details for {{ hostname }} {% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
<script>
|
||||
function whereAmI() {
|
||||
window.opener.ProcessChildMessage("{{ hostname_uuid }}");
|
||||
};
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<center>
|
||||
<h3>{{ hostname }}</h3>
|
||||
<button type="button" class="btn btn-secondary" onclick="whereAmI()">Locate node on tree</button>
|
||||
<a href="{{ url_for('hostnode_details_text', tree_uuid=tree_uuid, node_uuid=hostname_uuid) }}" class="btn btn-info" role="button">Get URLs as text</a>
|
||||
</center>
|
||||
<p>Click on the URL to get the content of the response</p>
|
||||
<ul class="list-group-flush">
|
||||
{% for url in urls %}
|
||||
<li class="list-group-item">
|
||||
<p class="h3">{{ url.name }}</p>
|
||||
<ul class="list-group">
|
||||
<li class="list-group-item">
|
||||
<p class="h4">Response</p>
|
||||
<div>
|
||||
{% for key, path in keys_response.items() %}
|
||||
{% if url[key] %}
|
||||
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
|
||||
{%endif%}
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% if not url.empty_response %}
|
||||
<div>
|
||||
<a href="{{ url_for('urlnode_details', tree_uuid=tree_uuid, node_uuid=url.uuid) }}">
|
||||
Download response body.
|
||||
</a></br>
|
||||
Body size: {{ sizeof_fmt(url.body.getbuffer().nbytes) }}
|
||||
</div>
|
||||
{% else %}
|
||||
Empty body.
|
||||
{%endif%}
|
||||
{% if url.sane_js_details_to_print %}
|
||||
<div>
|
||||
{% if url.sane_js_details_to_print is string %}
|
||||
{{ url.sane_js_details_to_print }}
|
||||
{% else %}
|
||||
This file is known as part of <b>{{ url.sane_js_details_to_print[0] }}</b>
|
||||
version <b>{{ url.sane_js_details_to_print[1] }}</b>: <b>{{ url.sane_js_details_to_print[2] }}</b>.
|
||||
{% if url.sane_js_details_to_print[3] > 1%}
|
||||
It is also present in <b>{{ url.sane_js_details_to_print[3] -1 }}</b> other libraries.
|
||||
{%endif%}
|
||||
{%endif%}
|
||||
</div>
|
||||
{% endif %}
|
||||
</li>
|
||||
<li class="list-group-item">
|
||||
<p class="h4">Request</p>
|
||||
<div>
|
||||
{% for key, path in keys_request.items() %}
|
||||
{% if url[key] %}
|
||||
<img src="{{ path }}" alt="{{ key }}" width="21" height="21"/>
|
||||
{%endif%}
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% if url.posted_data %}
|
||||
<a href="{{ url_for('urlnode_post_request', tree_uuid=tree_uuid, node_uuid=url.uuid) }}">
|
||||
Download posted data
|
||||
</a></br>
|
||||
Posted data size: {{ sizeof_fmt(url.posted_data|length) }}
|
||||
{% endif %}
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endblock %}
|
|
@ -9,6 +9,14 @@
|
|||
</center>
|
||||
</br>
|
||||
<form role="form" action="scrape" method=post enctype=multipart/form-data>
|
||||
<div class="form-group row">
|
||||
<div class="col-sm-10">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input"type="checkbox" name="listing" checked="true"></input>
|
||||
<label for="listing" class="form-check-label">Display results on public page</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group row">
|
||||
<label for="url" class="col-sm-2 col-form-label">URL:</label>
|
||||
<div class="col-sm-10">
|
||||
|
@ -33,14 +41,6 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-group row">
|
||||
<div class="col-sm-10">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input"type="checkbox" name="listing" checked="true"></input>
|
||||
<label for="listing" class="form-check-label">Display on main page</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group row">
|
||||
<label for="cookies" class="col-sm-6">Cookies (JSON export from the Firefox plugin Cookie Quick Manager)</label>
|
||||
<div class="col-sm-4">
|
||||
|
@ -89,7 +89,7 @@
|
|||
{% endfor%}
|
||||
{% endfor%}
|
||||
<div class="dropdown-divider"></div>
|
||||
<center><button type="submit" class="btn btn-default">Start looking!</button></center>
|
||||
<center><button type="submit" class="btn btn-primary">Start looking!</button></center>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
<dl class="row">
|
||||
<dt class="col-sm-3">{{ key }}</dt>
|
||||
<dd class="col-sm-3">{{ value }}</dd>
|
||||
</center>
|
||||
</dl>
|
||||
{% endfor %}
|
||||
{% endif%}
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
{% block content %}
|
||||
{{super()}}
|
||||
<script>
|
||||
var treeUUID = "{{ tree_uuid }}";
|
||||
var treeData = {{ tree_json | safe }};
|
||||
</script>
|
||||
|
||||
|
|
Loading…
Reference in New Issue