diff --git a/config/modules.json.sample b/config/modules.json.sample index e3c1365..6309016 100644 --- a/config/modules.json.sample +++ b/config/modules.json.sample @@ -1,14 +1,17 @@ { "VirusTotal": { "apikey": null, - "autosubmit": false + "autosubmit": false, + "allow_auto_trigger": false }, "PhishingInitiative": { "apikey": null, - "autosubmit": false + "autosubmit": false, + "allow_auto_trigger": false }, "SaneJS": { - "enabled": true + "enabled": true, + "allow_auto_trigger": true }, "MISP": { "apikey": null, @@ -18,16 +21,19 @@ "enable_lookup": false, "enable_push": false, "default_tags": [], - "auto_publish": false + "auto_publish": false, + "allow_auto_trigger": false }, "UniversalWhois": { "enabled": false, "ipaddress": "127.0.0.1", - "port": 4243 + "port": 4243, + "allow_auto_trigger": true }, "_notes": { "apikey": "null disables the module. Pass a string otherwise.", "autosubmit": "Automatically submits the URL to the 3rd party service.", + "allow_auto_trigger": "Allow auto trigger per module: some (i.e. VT) can be very expensive", "VirusTotal": "Module to query Virustotal: https://www.virustotal.com/", "PhishingInitiative": "Module to query phishing initiative: https://phishing-initiative.fr/contrib/", "SaneJS": "Module to query SaneJS: https://github.com/Lookyloo/sanejs", diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 05349b3..1156e26 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -155,6 +155,7 @@ class Lookyloo(): ct = CrawledTree(har_files, capture_uuid) self._ensure_meta(capture_dir, ct) self._resolve_dns(ct) + self.context.contextualize_tree(ct) # Force update cache of the capture (takes care of the incomplete redirect key) self._set_capture_cache(capture_dir, force=True) cache = self.capture_cache(capture_uuid) @@ -353,7 +354,7 @@ class Lookyloo(): with (capture_dir / 'categories').open('w') as f: f.writelines(f'{t}\n' for t in current_categories) - def trigger_modules(self, capture_uuid: str, /, force: bool=False) -> None: + def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> None: '''Launch the 3rd party modules on a capture. It uses the cached result *if* the module was triggered the same day. The `force` flag re-triggers the module regardless of the cache.''' @@ -363,19 +364,9 @@ class Lookyloo(): self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_uuid}) is cached.') return - if self.pi.available: - if ct.redirects: - for redirect in ct.redirects: - self.pi.url_lookup(redirect, force) - else: - self.pi.url_lookup(ct.root_hartree.har.root_url, force) - - if self.vt.available: - if ct.redirects: - for redirect in ct.redirects: - self.vt.url_lookup(redirect, force) - else: - self.vt.url_lookup(ct.root_hartree.har.root_url, force) + self.pi.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger) + self.vt.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger) + self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger) def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]: '''Get the responses of the modules from the cached responses on the disk''' @@ -1178,6 +1169,7 @@ class Lookyloo(): raise MissingUUID(f'Unable to find UUID {node_uuid} in {node_uuid}') known_content = self.context.find_known_content(hostnode) + self.uwhois.query_whois_hostnode(hostnode) urls: List[Dict[str, Any]] = [] for url in hostnode.urls: diff --git a/lookyloo/modules.py b/lookyloo/modules.py index 51c0a1e..8247555 100644 --- a/lookyloo/modules.py +++ b/lookyloo/modules.py @@ -19,6 +19,8 @@ from pysanejs import SaneJS from pyeupi import PyEUPI from pymisp import PyMISP, MISPEvent, MISPAttribute +from har2tree import CrawledTree, HostNode + class MISP(): @@ -33,6 +35,7 @@ class MISP(): self.available = True self.enable_lookup = False self.enable_push = False + self.allow_auto_trigger = False try: self.client = PyMISP(url=config['url'], key=config['apikey'], ssl=config['verify_tls_cert'], timeout=config['timeout']) @@ -45,6 +48,8 @@ class MISP(): self.enable_lookup = True if config.get('enable_push'): self.enable_push = True + if config.get('allow_auto_trigger'): + self.allow_auto_trigger = True self.default_tags: List[str] = config.get('default_tags') # type: ignore self.auto_publish = config.get('auto_publish') self.storage_dir_misp = get_homedir() / 'misp' @@ -126,6 +131,10 @@ class UniversalWhois(): return self.server = config.get('ipaddress') self.port = config.get('port') + self.allow_auto_trigger = False + if config.get('allow_auto_trigger'): + self.allow_auto_trigger = True + try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.connect((self.server, self.port)) @@ -135,6 +144,27 @@ class UniversalWhois(): return self.available = True + def query_whois_hostnode(self, hostnode: HostNode) -> None: + if hasattr(hostnode, 'resolved_ips'): + for ip in hostnode.resolved_ips: + self.whois(ip) + if hasattr(hostnode, 'cnames'): + for cname in hostnode.cnames: + self.whois(cname) + self.whois(hostnode.name) + + def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, force: bool=False, auto_trigger: bool=False) -> None: + '''Run the module on all the nodes up to the final redirect''' + if not self.available: + return None + if auto_trigger and not self.allow_auto_trigger: + return None + + hostnode = crawled_tree.root_hartree.get_host_node_by_uuid(crawled_tree.root_hartree.rendered_node.hostnode_uuid) + self.query_whois_hostnode(hostnode) + for n in hostnode.get_ancestors(): + self.query_whois_hostnode(n) + def whois(self, query: str) -> str: bytes_whois = b'' with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: @@ -163,6 +193,9 @@ class SaneJavaScript(): self.available = False return self.available = True + self.allow_auto_trigger = False + if config.get('allow_auto_trigger'): + self.allow_auto_trigger = True self.storage_dir = get_homedir() / 'sanejs' self.storage_dir.mkdir(parents=True, exist_ok=True) @@ -231,9 +264,15 @@ class PhishingInitiative(): self.available = True self.autosubmit = False + self.allow_auto_trigger = False self.client = PyEUPI(config['apikey']) + + if config.get('allow_auto_trigger'): + self.allow_auto_trigger = True + if config.get('autosubmit'): self.autosubmit = True + self.storage_dir_eupi = get_homedir() / 'eupi' self.storage_dir_eupi.mkdir(parents=True, exist_ok=True) @@ -253,6 +292,19 @@ class PhishingInitiative(): with cached_entries[0].open() as f: return json.load(f) + def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, force: bool=False, auto_trigger: bool=False) -> None: + '''Run the module on all the nodes up to the final redirect''' + if not self.available: + return None + if auto_trigger and not self.allow_auto_trigger: + return None + + if crawled_tree.redirects: + for redirect in crawled_tree.redirects: + self.url_lookup(redirect, force) + else: + self.url_lookup(crawled_tree.root_hartree.har.root_url, force) + def url_lookup(self, url: str, force: bool=False) -> None: '''Lookup an URL on Phishing Initiative Note: force means 2 things: @@ -304,9 +356,15 @@ class VirusTotal(): self.available = True self.autosubmit = False + self.allow_auto_trigger = False self.client = vt.Client(config['apikey']) + + if config.get('allow_auto_trigger'): + self.allow_auto_trigger = True + if config.get('autosubmit'): self.autosubmit = True + self.storage_dir_vt = get_homedir() / 'vt_url' self.storage_dir_vt.mkdir(parents=True, exist_ok=True) @@ -327,6 +385,19 @@ class VirusTotal(): with cached_entries[0].open() as f: return json.load(f) + def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, force: bool=False, auto_trigger: bool=False) -> None: + '''Run the module on all the nodes up to the final redirect''' + if not self.available: + return None + if auto_trigger and not self.allow_auto_trigger: + return None + + if crawled_tree.redirects: + for redirect in crawled_tree.redirects: + self.url_lookup(redirect, force) + else: + self.url_lookup(crawled_tree.root_hartree.har.root_url, force) + def url_lookup(self, url: str, force: bool=False) -> None: '''Lookup an URL on VT Note: force means 2 things: diff --git a/website/web/__init__.py b/website/web/__init__.py index acf1e0c..c59c354 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -284,10 +284,11 @@ def rebuild_tree(tree_uuid: str): return redirect(url_for('index')) -@app.route('/tree//trigger_modules/', defaults={'force': False}) -@app.route('/tree//trigger_modules/', methods=['GET']) -def trigger_modules(tree_uuid: str, force: int): - lookyloo.trigger_modules(tree_uuid, True if force else False) +@app.route('/tree//trigger_modules', methods=['GET']) +def trigger_modules(tree_uuid: str): + force = True if request.args.get('force') else False + auto_trigger = True if request.args.get('auto_trigger') else False + lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger) return redirect(url_for('modules', tree_uuid=tree_uuid)) @@ -512,7 +513,6 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None): try: ct = lookyloo.get_crawled_tree(tree_uuid) - ct = lookyloo.context.contextualize_tree(ct) b64_thumbnail = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=True) screenshot_size = lookyloo.get_screenshot(tree_uuid).getbuffer().nbytes meta = lookyloo.get_meta(tree_uuid) diff --git a/website/web/templates/tree.html b/website/web/templates/tree.html index 57a621e..7a306c4 100644 --- a/website/web/templates/tree.html +++ b/website/web/templates/tree.html @@ -96,7 +96,7 @@ {% if auto_trigger_modules %} {% endif%} {% endblock %}