From cefb3fddbfc3ca17d1ca4edffe463a9423c53345 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 16 Sep 2021 16:33:44 +0200 Subject: [PATCH] new: Phishtank lookup. --- config/modules.json.sample | 8 ++- lookyloo/lookyloo.py | 14 ++++- lookyloo/modules/__init__.py | 3 +- lookyloo/modules/phishtank.py | 90 ++++++++++++++++++++++++++++++ poetry.lock | 20 ++++++- pyproject.toml | 1 + website/web/__init__.py | 12 +++- website/web/templates/modules.html | 14 +++++ 8 files changed, 157 insertions(+), 5 deletions(-) create mode 100644 lookyloo/modules/phishtank.py diff --git a/config/modules.json.sample b/config/modules.json.sample index 755061f..6d815fb 100644 --- a/config/modules.json.sample +++ b/config/modules.json.sample @@ -36,6 +36,11 @@ "allow_auto_trigger": false, "force_visibility": false }, + "Phishtank": { + "enabled": false, + "url": null, + "allow_auto_trigger": true + }, "_notes": { "apikey": "null disables the module. Pass a string otherwise.", "autosubmit": "Automatically submits the URL to the 3rd party service.", @@ -45,6 +50,7 @@ "SaneJS": "Module to query SaneJS: https://github.com/Lookyloo/sanejs", "MISP": "Module to query MISP: https://www.misp-project.org/", "UniversalWhois": "Module to query a local instance of uWhoisd: https://github.com/Lookyloo/uwhoisd", - "UrlScan": "Module to query urlscan.io" + "UrlScan": "Module to query urlscan.io", + "Phishtank": "Module to query Phishtank Lookup (https://github.com/Lookyloo/phishtank-lookup). URL set to none means querying the public instance." } } diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index dafc3ba..398dd24 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -40,7 +40,7 @@ from .helpers import (CaptureStatus, get_captures_dir, get_config, uniq_domains) from .indexing import Indexing from .modules import (MISP, PhishingInitiative, SaneJavaScript, UniversalWhois, - UrlScan, VirusTotal) + UrlScan, VirusTotal, Phishtank) class Lookyloo(): @@ -85,6 +85,10 @@ class Lookyloo(): if not self.urlscan.available: self.logger.warning('Unable to setup the UrlScan module') + self.phishtank = Phishtank(get_config('modules', 'Phishtank')) + if not self.phishtank.available: + self.logger.warning('Unable to setup the Phishtank module') + self.context = Context(self.sanejs) self._captures_index: Dict[str, CaptureCache] = {} @@ -473,6 +477,7 @@ class Lookyloo(): self.get_info(capture_uuid), visibility='unlisted' if (capture_cache and capture_cache.no_index) else 'public', force=force, auto_trigger=auto_trigger) + to_return['Phishtank'] = self.phishtank.capture_default_trigger(ct, auto_trigger=auto_trigger) return to_return def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]: @@ -497,6 +502,13 @@ class Lookyloo(): to_return['pi'][redirect] = self.pi.get_url_lookup(redirect) else: to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url) + if self.phishtank.available: + to_return['phishtank'] = {} + if ct.redirects: + for redirect in ct.redirects: + to_return['phishtank'][redirect] = self.phishtank.get_url_lookup(redirect) + else: + to_return['phishtank'][ct.root_hartree.har.root_url] = self.phishtank.get_url_lookup(ct.root_hartree.har.root_url) if self.urlscan.available: info = self.get_info(capture_uuid) to_return['urlscan'] = {'submission': {}, 'result': {}} diff --git a/lookyloo/modules/__init__.py b/lookyloo/modules/__init__.py index e19ff6f..42830c6 100644 --- a/lookyloo/modules/__init__.py +++ b/lookyloo/modules/__init__.py @@ -6,4 +6,5 @@ from .pi import PhishingInitiative # noqa from .sanejs import SaneJavaScript # noqa from .urlscan import UrlScan # noqa from .uwhois import UniversalWhois # noqa -from.vt import VirusTotal # noqa +from .vt import VirusTotal # noqa +from .phishtank import Phishtank # noqa diff --git a/lookyloo/modules/phishtank.py b/lookyloo/modules/phishtank.py new file mode 100644 index 0000000..7071977 --- /dev/null +++ b/lookyloo/modules/phishtank.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import hashlib +import json +from datetime import date, datetime, timedelta, timezone +from pathlib import Path +from typing import Any, Dict, Optional + +from har2tree import CrawledTree +from pyphishtanklookup import PhishtankLookup + +from ..exceptions import ConfigError +from ..helpers import get_homedir + +# Note: stop doing requests 48 after the capture was intially done. + + +class Phishtank(): + + def __init__(self, config: Dict[str, Any]): + if not config.get('enabled'): + self.available = False + return + + self.available = True + self.allow_auto_trigger = False + if config.get('url'): + self.client = PhishtankLookup(config['url']) + else: + self.client = PhishtankLookup() + + if config.get('allow_auto_trigger'): + self.allow_auto_trigger = True + + self.storage_dir_pt = get_homedir() / 'phishtank' + self.storage_dir_pt.mkdir(parents=True, exist_ok=True) + + def __get_cache_directory(self, url: str) -> Path: + m = hashlib.md5() + m.update(url.encode()) + return self.storage_dir_pt / m.hexdigest() + + def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]: + url_storage_dir = self.__get_cache_directory(url) + if not url_storage_dir.exists(): + return None + cached_entries = sorted(url_storage_dir.glob('*'), reverse=True) + if not cached_entries: + return None + + with cached_entries[0].open() as f: + return json.load(f) + + def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> Dict: + '''Run the module on all the nodes up to the final redirect''' + if not self.available: + return {'error': 'Module not available'} + if auto_trigger and not self.allow_auto_trigger: + return {'error': 'Auto trigger not allowed on module'} + + # Quit if the capture is more than 70h old, the data in phishtank expire around that time. + if crawled_tree.start_time <= datetime.now(timezone.utc) - timedelta(hours=70): + return {'error': 'Capture to old, the response will be irrelevant.'} + + if crawled_tree.redirects: + for redirect in crawled_tree.redirects: + self.url_lookup(redirect) + else: + self.url_lookup(crawled_tree.root_hartree.har.root_url) + return {'success': 'Module triggered'} + + def url_lookup(self, url: str) -> None: + '''Lookup an URL on Phishtank lookup + Note: It will trigger a request to phishtank every time *until* there is a hit (it's cheap), then once a day. + ''' + if not self.available: + raise ConfigError('VirusTotal not available, probably no API key') + + url_storage_dir = self.__get_cache_directory(url) + url_storage_dir.mkdir(parents=True, exist_ok=True) + pt_file = url_storage_dir / date.today().isoformat() + + if pt_file.exists(): + return + + url_information = self.client.get_url_entry(url) + if url_information: + with pt_file.open('w') as _f: + json.dump(url_information, _f) diff --git a/poetry.lock b/poetry.lock index 4509545..e751e28 100644 --- a/poetry.lock +++ b/poetry.lock @@ -857,6 +857,20 @@ six = ">=1.5.2" docs = ["sphinx", "sphinx-rtd-theme"] test = ["flaky", "pretend", "pytest (>=3.0.1)"] +[[package]] +name = "pyphishtanklookup" +version = "1.0.1" +description = "Python CLI and module for PhishtankLookup" +category = "main" +optional = false +python-versions = ">=3.8,<4.0" + +[package.dependencies] +requests = ">=2.26.0,<3.0.0" + +[package.extras] +docs = ["Sphinx (>=4.2,<5.0)"] + [[package]] name = "pypydispatcher" version = "2.1.2" @@ -1303,7 +1317,7 @@ misp = ["python-magic", "pydeep"] [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "06884ac13bf1010128d260e6ae1c1b0643690685e21149bc667cc71abb342ad1" +content-hash = "ebcc6948a4459a9c998c5ecefce83e003cd720c7c498467aa35f510d49815c23" [metadata.files] aiohttp = [ @@ -1984,6 +1998,10 @@ pyopenssl = [ {file = "pyOpenSSL-20.0.1-py2.py3-none-any.whl", hash = "sha256:818ae18e06922c066f777a33f1fca45786d85edfe71cd043de6379337a7f274b"}, {file = "pyOpenSSL-20.0.1.tar.gz", hash = "sha256:4c231c759543ba02560fcd2480c48dcec4dae34c9da7d3747c508227e0624b51"}, ] +pyphishtanklookup = [ + {file = "pyphishtanklookup-1.0.1-py3-none-any.whl", hash = "sha256:f000c088255b2d18b166c80b51457e13b1dea30538d1d1943344d70ddc711b41"}, + {file = "pyphishtanklookup-1.0.1.tar.gz", hash = "sha256:509e4134f9f8ef843987f9a32bed872cf856c15d838d1be434485dc063ee0dc6"}, +] pypydispatcher = [ {file = "PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2"}, ] diff --git a/pyproject.toml b/pyproject.toml index f2c5333..36ce45c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ Flask-Login = "^0.5.0" flask-restx = "^0.5.1" hiredis = "^2.0.0" rich = "^10.9.0" +pyphishtanklookup = "^1.0.1" [tool.poetry.extras] misp = ['python-magic', 'pydeep'] diff --git a/website/web/__init__.py b/website/web/__init__.py index 6d64ec2..fec50a8 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -395,6 +395,14 @@ def modules(tree_uuid: str): continue pi_short_result[url] = full_report['results'][0]['tag_label'] + phishtank_short_result: Dict[str, str] = {} + if 'phishtank' in modules_responses: + pt = modules_responses.pop('phishtank') + for url, full_report in pt.items(): + if not full_report: + continue + phishtank_short_result[url] = full_report['phish_detail_url'] + urlscan_to_display: Dict = {} if 'urlscan' in modules_responses: urlscan = modules_responses.pop('urlscan') @@ -412,7 +420,9 @@ def modules(tree_uuid: str): else: # unable to run the query, probably an invalid key pass - return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result, pi=pi_short_result, urlscan=urlscan_to_display) + return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result, + pi=pi_short_result, urlscan=urlscan_to_display, + phishtank=phishtank_short_result) @app.route('/tree//redirects', methods=['GET']) diff --git a/website/web/templates/modules.html b/website/web/templates/modules.html index 6fc1bf6..f690f22 100644 --- a/website/web/templates/modules.html +++ b/website/web/templates/modules.html @@ -18,6 +18,20 @@ {% endif%} +{% if phishtank %} +
+
+

Phishtank

+
+

A scan was triggered for the url(s) below:

+
    + {% for url, permaurl in phishtank.items() %} +
  • {{ shorten_string(url, 150) }}: click to view it on phishtank.

    +
  • + {% endfor %} +
+
+{% endif%} {% if vt %}

Virus Total