new: Phishtank lookup.

pull/263/head
Raphaël Vinot 2021-09-16 16:33:44 +02:00
parent 9c552f1032
commit cefb3fddbf
8 changed files with 157 additions and 5 deletions

View File

@ -36,6 +36,11 @@
"allow_auto_trigger": false,
"force_visibility": false
},
"Phishtank": {
"enabled": false,
"url": null,
"allow_auto_trigger": true
},
"_notes": {
"apikey": "null disables the module. Pass a string otherwise.",
"autosubmit": "Automatically submits the URL to the 3rd party service.",
@ -45,6 +50,7 @@
"SaneJS": "Module to query SaneJS: https://github.com/Lookyloo/sanejs",
"MISP": "Module to query MISP: https://www.misp-project.org/",
"UniversalWhois": "Module to query a local instance of uWhoisd: https://github.com/Lookyloo/uwhoisd",
"UrlScan": "Module to query urlscan.io"
"UrlScan": "Module to query urlscan.io",
"Phishtank": "Module to query Phishtank Lookup (https://github.com/Lookyloo/phishtank-lookup). URL set to none means querying the public instance."
}
}

View File

@ -40,7 +40,7 @@ from .helpers import (CaptureStatus, get_captures_dir, get_config,
uniq_domains)
from .indexing import Indexing
from .modules import (MISP, PhishingInitiative, SaneJavaScript, UniversalWhois,
UrlScan, VirusTotal)
UrlScan, VirusTotal, Phishtank)
class Lookyloo():
@ -85,6 +85,10 @@ class Lookyloo():
if not self.urlscan.available:
self.logger.warning('Unable to setup the UrlScan module')
self.phishtank = Phishtank(get_config('modules', 'Phishtank'))
if not self.phishtank.available:
self.logger.warning('Unable to setup the Phishtank module')
self.context = Context(self.sanejs)
self._captures_index: Dict[str, CaptureCache] = {}
@ -473,6 +477,7 @@ class Lookyloo():
self.get_info(capture_uuid),
visibility='unlisted' if (capture_cache and capture_cache.no_index) else 'public',
force=force, auto_trigger=auto_trigger)
to_return['Phishtank'] = self.phishtank.capture_default_trigger(ct, auto_trigger=auto_trigger)
return to_return
def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]:
@ -497,6 +502,13 @@ class Lookyloo():
to_return['pi'][redirect] = self.pi.get_url_lookup(redirect)
else:
to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url)
if self.phishtank.available:
to_return['phishtank'] = {}
if ct.redirects:
for redirect in ct.redirects:
to_return['phishtank'][redirect] = self.phishtank.get_url_lookup(redirect)
else:
to_return['phishtank'][ct.root_hartree.har.root_url] = self.phishtank.get_url_lookup(ct.root_hartree.har.root_url)
if self.urlscan.available:
info = self.get_info(capture_uuid)
to_return['urlscan'] = {'submission': {}, 'result': {}}

View File

@ -6,4 +6,5 @@ from .pi import PhishingInitiative # noqa
from .sanejs import SaneJavaScript # noqa
from .urlscan import UrlScan # noqa
from .uwhois import UniversalWhois # noqa
from.vt import VirusTotal # noqa
from .vt import VirusTotal # noqa
from .phishtank import Phishtank # noqa

View File

@ -0,0 +1,90 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import json
from datetime import date, datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, Optional
from har2tree import CrawledTree
from pyphishtanklookup import PhishtankLookup
from ..exceptions import ConfigError
from ..helpers import get_homedir
# Note: stop doing requests 48 after the capture was intially done.
class Phishtank():
def __init__(self, config: Dict[str, Any]):
if not config.get('enabled'):
self.available = False
return
self.available = True
self.allow_auto_trigger = False
if config.get('url'):
self.client = PhishtankLookup(config['url'])
else:
self.client = PhishtankLookup()
if config.get('allow_auto_trigger'):
self.allow_auto_trigger = True
self.storage_dir_pt = get_homedir() / 'phishtank'
self.storage_dir_pt.mkdir(parents=True, exist_ok=True)
def __get_cache_directory(self, url: str) -> Path:
m = hashlib.md5()
m.update(url.encode())
return self.storage_dir_pt / m.hexdigest()
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
url_storage_dir = self.__get_cache_directory(url)
if not url_storage_dir.exists():
return None
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
if not cached_entries:
return None
with cached_entries[0].open() as f:
return json.load(f)
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> Dict:
'''Run the module on all the nodes up to the final redirect'''
if not self.available:
return {'error': 'Module not available'}
if auto_trigger and not self.allow_auto_trigger:
return {'error': 'Auto trigger not allowed on module'}
# Quit if the capture is more than 70h old, the data in phishtank expire around that time.
if crawled_tree.start_time <= datetime.now(timezone.utc) - timedelta(hours=70):
return {'error': 'Capture to old, the response will be irrelevant.'}
if crawled_tree.redirects:
for redirect in crawled_tree.redirects:
self.url_lookup(redirect)
else:
self.url_lookup(crawled_tree.root_hartree.har.root_url)
return {'success': 'Module triggered'}
def url_lookup(self, url: str) -> None:
'''Lookup an URL on Phishtank lookup
Note: It will trigger a request to phishtank every time *until* there is a hit (it's cheap), then once a day.
'''
if not self.available:
raise ConfigError('VirusTotal not available, probably no API key')
url_storage_dir = self.__get_cache_directory(url)
url_storage_dir.mkdir(parents=True, exist_ok=True)
pt_file = url_storage_dir / date.today().isoformat()
if pt_file.exists():
return
url_information = self.client.get_url_entry(url)
if url_information:
with pt_file.open('w') as _f:
json.dump(url_information, _f)

20
poetry.lock generated
View File

@ -857,6 +857,20 @@ six = ">=1.5.2"
docs = ["sphinx", "sphinx-rtd-theme"]
test = ["flaky", "pretend", "pytest (>=3.0.1)"]
[[package]]
name = "pyphishtanklookup"
version = "1.0.1"
description = "Python CLI and module for PhishtankLookup"
category = "main"
optional = false
python-versions = ">=3.8,<4.0"
[package.dependencies]
requests = ">=2.26.0,<3.0.0"
[package.extras]
docs = ["Sphinx (>=4.2,<5.0)"]
[[package]]
name = "pypydispatcher"
version = "2.1.2"
@ -1303,7 +1317,7 @@ misp = ["python-magic", "pydeep"]
[metadata]
lock-version = "1.1"
python-versions = "^3.8"
content-hash = "06884ac13bf1010128d260e6ae1c1b0643690685e21149bc667cc71abb342ad1"
content-hash = "ebcc6948a4459a9c998c5ecefce83e003cd720c7c498467aa35f510d49815c23"
[metadata.files]
aiohttp = [
@ -1984,6 +1998,10 @@ pyopenssl = [
{file = "pyOpenSSL-20.0.1-py2.py3-none-any.whl", hash = "sha256:818ae18e06922c066f777a33f1fca45786d85edfe71cd043de6379337a7f274b"},
{file = "pyOpenSSL-20.0.1.tar.gz", hash = "sha256:4c231c759543ba02560fcd2480c48dcec4dae34c9da7d3747c508227e0624b51"},
]
pyphishtanklookup = [
{file = "pyphishtanklookup-1.0.1-py3-none-any.whl", hash = "sha256:f000c088255b2d18b166c80b51457e13b1dea30538d1d1943344d70ddc711b41"},
{file = "pyphishtanklookup-1.0.1.tar.gz", hash = "sha256:509e4134f9f8ef843987f9a32bed872cf856c15d838d1be434485dc063ee0dc6"},
]
pypydispatcher = [
{file = "PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2"},
]

View File

@ -63,6 +63,7 @@ Flask-Login = "^0.5.0"
flask-restx = "^0.5.1"
hiredis = "^2.0.0"
rich = "^10.9.0"
pyphishtanklookup = "^1.0.1"
[tool.poetry.extras]
misp = ['python-magic', 'pydeep']

View File

@ -395,6 +395,14 @@ def modules(tree_uuid: str):
continue
pi_short_result[url] = full_report['results'][0]['tag_label']
phishtank_short_result: Dict[str, str] = {}
if 'phishtank' in modules_responses:
pt = modules_responses.pop('phishtank')
for url, full_report in pt.items():
if not full_report:
continue
phishtank_short_result[url] = full_report['phish_detail_url']
urlscan_to_display: Dict = {}
if 'urlscan' in modules_responses:
urlscan = modules_responses.pop('urlscan')
@ -412,7 +420,9 @@ def modules(tree_uuid: str):
else:
# unable to run the query, probably an invalid key
pass
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result, pi=pi_short_result, urlscan=urlscan_to_display)
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result,
pi=pi_short_result, urlscan=urlscan_to_display,
phishtank=phishtank_short_result)
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])

View File

@ -18,6 +18,20 @@
</div>
</center>
{% endif%}
{% if phishtank %}
<hr>
<center>
<h1 class="display-4">Phishtank</h1>
<div>
<p>A scan was triggered for the url(s) below: </p>
<ul>
{% for url, permaurl in phishtank.items() %}
<li>{{ shorten_string(url, 150) }}: <a href="{{ permaurl }}">click to view it</a> on phishtank.</p>
</li>
{% endfor %}
</div>
</center>
{% endif%}
{% if vt %}
<hr>
<center><h1 class="display-4">Virus Total</h1></center>