mirror of https://github.com/CIRCL/lookyloo
new: Phishtank lookup.
parent
9c552f1032
commit
cefb3fddbf
|
@ -36,6 +36,11 @@
|
|||
"allow_auto_trigger": false,
|
||||
"force_visibility": false
|
||||
},
|
||||
"Phishtank": {
|
||||
"enabled": false,
|
||||
"url": null,
|
||||
"allow_auto_trigger": true
|
||||
},
|
||||
"_notes": {
|
||||
"apikey": "null disables the module. Pass a string otherwise.",
|
||||
"autosubmit": "Automatically submits the URL to the 3rd party service.",
|
||||
|
@ -45,6 +50,7 @@
|
|||
"SaneJS": "Module to query SaneJS: https://github.com/Lookyloo/sanejs",
|
||||
"MISP": "Module to query MISP: https://www.misp-project.org/",
|
||||
"UniversalWhois": "Module to query a local instance of uWhoisd: https://github.com/Lookyloo/uwhoisd",
|
||||
"UrlScan": "Module to query urlscan.io"
|
||||
"UrlScan": "Module to query urlscan.io",
|
||||
"Phishtank": "Module to query Phishtank Lookup (https://github.com/Lookyloo/phishtank-lookup). URL set to none means querying the public instance."
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ from .helpers import (CaptureStatus, get_captures_dir, get_config,
|
|||
uniq_domains)
|
||||
from .indexing import Indexing
|
||||
from .modules import (MISP, PhishingInitiative, SaneJavaScript, UniversalWhois,
|
||||
UrlScan, VirusTotal)
|
||||
UrlScan, VirusTotal, Phishtank)
|
||||
|
||||
|
||||
class Lookyloo():
|
||||
|
@ -85,6 +85,10 @@ class Lookyloo():
|
|||
if not self.urlscan.available:
|
||||
self.logger.warning('Unable to setup the UrlScan module')
|
||||
|
||||
self.phishtank = Phishtank(get_config('modules', 'Phishtank'))
|
||||
if not self.phishtank.available:
|
||||
self.logger.warning('Unable to setup the Phishtank module')
|
||||
|
||||
self.context = Context(self.sanejs)
|
||||
self._captures_index: Dict[str, CaptureCache] = {}
|
||||
|
||||
|
@ -473,6 +477,7 @@ class Lookyloo():
|
|||
self.get_info(capture_uuid),
|
||||
visibility='unlisted' if (capture_cache and capture_cache.no_index) else 'public',
|
||||
force=force, auto_trigger=auto_trigger)
|
||||
to_return['Phishtank'] = self.phishtank.capture_default_trigger(ct, auto_trigger=auto_trigger)
|
||||
return to_return
|
||||
|
||||
def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]:
|
||||
|
@ -497,6 +502,13 @@ class Lookyloo():
|
|||
to_return['pi'][redirect] = self.pi.get_url_lookup(redirect)
|
||||
else:
|
||||
to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url)
|
||||
if self.phishtank.available:
|
||||
to_return['phishtank'] = {}
|
||||
if ct.redirects:
|
||||
for redirect in ct.redirects:
|
||||
to_return['phishtank'][redirect] = self.phishtank.get_url_lookup(redirect)
|
||||
else:
|
||||
to_return['phishtank'][ct.root_hartree.har.root_url] = self.phishtank.get_url_lookup(ct.root_hartree.har.root_url)
|
||||
if self.urlscan.available:
|
||||
info = self.get_info(capture_uuid)
|
||||
to_return['urlscan'] = {'submission': {}, 'result': {}}
|
||||
|
|
|
@ -7,3 +7,4 @@ from .sanejs import SaneJavaScript # noqa
|
|||
from .urlscan import UrlScan # noqa
|
||||
from .uwhois import UniversalWhois # noqa
|
||||
from .vt import VirusTotal # noqa
|
||||
from .phishtank import Phishtank # noqa
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from har2tree import CrawledTree
|
||||
from pyphishtanklookup import PhishtankLookup
|
||||
|
||||
from ..exceptions import ConfigError
|
||||
from ..helpers import get_homedir
|
||||
|
||||
# Note: stop doing requests 48 after the capture was intially done.
|
||||
|
||||
|
||||
class Phishtank():
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
if not config.get('enabled'):
|
||||
self.available = False
|
||||
return
|
||||
|
||||
self.available = True
|
||||
self.allow_auto_trigger = False
|
||||
if config.get('url'):
|
||||
self.client = PhishtankLookup(config['url'])
|
||||
else:
|
||||
self.client = PhishtankLookup()
|
||||
|
||||
if config.get('allow_auto_trigger'):
|
||||
self.allow_auto_trigger = True
|
||||
|
||||
self.storage_dir_pt = get_homedir() / 'phishtank'
|
||||
self.storage_dir_pt.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def __get_cache_directory(self, url: str) -> Path:
|
||||
m = hashlib.md5()
|
||||
m.update(url.encode())
|
||||
return self.storage_dir_pt / m.hexdigest()
|
||||
|
||||
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
url_storage_dir = self.__get_cache_directory(url)
|
||||
if not url_storage_dir.exists():
|
||||
return None
|
||||
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
|
||||
if not cached_entries:
|
||||
return None
|
||||
|
||||
with cached_entries[0].open() as f:
|
||||
return json.load(f)
|
||||
|
||||
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> Dict:
|
||||
'''Run the module on all the nodes up to the final redirect'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
if auto_trigger and not self.allow_auto_trigger:
|
||||
return {'error': 'Auto trigger not allowed on module'}
|
||||
|
||||
# Quit if the capture is more than 70h old, the data in phishtank expire around that time.
|
||||
if crawled_tree.start_time <= datetime.now(timezone.utc) - timedelta(hours=70):
|
||||
return {'error': 'Capture to old, the response will be irrelevant.'}
|
||||
|
||||
if crawled_tree.redirects:
|
||||
for redirect in crawled_tree.redirects:
|
||||
self.url_lookup(redirect)
|
||||
else:
|
||||
self.url_lookup(crawled_tree.root_hartree.har.root_url)
|
||||
return {'success': 'Module triggered'}
|
||||
|
||||
def url_lookup(self, url: str) -> None:
|
||||
'''Lookup an URL on Phishtank lookup
|
||||
Note: It will trigger a request to phishtank every time *until* there is a hit (it's cheap), then once a day.
|
||||
'''
|
||||
if not self.available:
|
||||
raise ConfigError('VirusTotal not available, probably no API key')
|
||||
|
||||
url_storage_dir = self.__get_cache_directory(url)
|
||||
url_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
pt_file = url_storage_dir / date.today().isoformat()
|
||||
|
||||
if pt_file.exists():
|
||||
return
|
||||
|
||||
url_information = self.client.get_url_entry(url)
|
||||
if url_information:
|
||||
with pt_file.open('w') as _f:
|
||||
json.dump(url_information, _f)
|
|
@ -857,6 +857,20 @@ six = ">=1.5.2"
|
|||
docs = ["sphinx", "sphinx-rtd-theme"]
|
||||
test = ["flaky", "pretend", "pytest (>=3.0.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "pyphishtanklookup"
|
||||
version = "1.0.1"
|
||||
description = "Python CLI and module for PhishtankLookup"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.8,<4.0"
|
||||
|
||||
[package.dependencies]
|
||||
requests = ">=2.26.0,<3.0.0"
|
||||
|
||||
[package.extras]
|
||||
docs = ["Sphinx (>=4.2,<5.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pypydispatcher"
|
||||
version = "2.1.2"
|
||||
|
@ -1303,7 +1317,7 @@ misp = ["python-magic", "pydeep"]
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "06884ac13bf1010128d260e6ae1c1b0643690685e21149bc667cc71abb342ad1"
|
||||
content-hash = "ebcc6948a4459a9c998c5ecefce83e003cd720c7c498467aa35f510d49815c23"
|
||||
|
||||
[metadata.files]
|
||||
aiohttp = [
|
||||
|
@ -1984,6 +1998,10 @@ pyopenssl = [
|
|||
{file = "pyOpenSSL-20.0.1-py2.py3-none-any.whl", hash = "sha256:818ae18e06922c066f777a33f1fca45786d85edfe71cd043de6379337a7f274b"},
|
||||
{file = "pyOpenSSL-20.0.1.tar.gz", hash = "sha256:4c231c759543ba02560fcd2480c48dcec4dae34c9da7d3747c508227e0624b51"},
|
||||
]
|
||||
pyphishtanklookup = [
|
||||
{file = "pyphishtanklookup-1.0.1-py3-none-any.whl", hash = "sha256:f000c088255b2d18b166c80b51457e13b1dea30538d1d1943344d70ddc711b41"},
|
||||
{file = "pyphishtanklookup-1.0.1.tar.gz", hash = "sha256:509e4134f9f8ef843987f9a32bed872cf856c15d838d1be434485dc063ee0dc6"},
|
||||
]
|
||||
pypydispatcher = [
|
||||
{file = "PyPyDispatcher-2.1.2.tar.gz", hash = "sha256:b6bec5dfcff9d2535bca2b23c80eae367b1ac250a645106948d315fcfa9130f2"},
|
||||
]
|
||||
|
|
|
@ -63,6 +63,7 @@ Flask-Login = "^0.5.0"
|
|||
flask-restx = "^0.5.1"
|
||||
hiredis = "^2.0.0"
|
||||
rich = "^10.9.0"
|
||||
pyphishtanklookup = "^1.0.1"
|
||||
|
||||
[tool.poetry.extras]
|
||||
misp = ['python-magic', 'pydeep']
|
||||
|
|
|
@ -395,6 +395,14 @@ def modules(tree_uuid: str):
|
|||
continue
|
||||
pi_short_result[url] = full_report['results'][0]['tag_label']
|
||||
|
||||
phishtank_short_result: Dict[str, str] = {}
|
||||
if 'phishtank' in modules_responses:
|
||||
pt = modules_responses.pop('phishtank')
|
||||
for url, full_report in pt.items():
|
||||
if not full_report:
|
||||
continue
|
||||
phishtank_short_result[url] = full_report['phish_detail_url']
|
||||
|
||||
urlscan_to_display: Dict = {}
|
||||
if 'urlscan' in modules_responses:
|
||||
urlscan = modules_responses.pop('urlscan')
|
||||
|
@ -412,7 +420,9 @@ def modules(tree_uuid: str):
|
|||
else:
|
||||
# unable to run the query, probably an invalid key
|
||||
pass
|
||||
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result, pi=pi_short_result, urlscan=urlscan_to_display)
|
||||
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result,
|
||||
pi=pi_short_result, urlscan=urlscan_to_display,
|
||||
phishtank=phishtank_short_result)
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])
|
||||
|
|
|
@ -18,6 +18,20 @@
|
|||
</div>
|
||||
</center>
|
||||
{% endif%}
|
||||
{% if phishtank %}
|
||||
<hr>
|
||||
<center>
|
||||
<h1 class="display-4">Phishtank</h1>
|
||||
<div>
|
||||
<p>A scan was triggered for the url(s) below: </p>
|
||||
<ul>
|
||||
{% for url, permaurl in phishtank.items() %}
|
||||
<li>{{ shorten_string(url, 150) }}: <a href="{{ permaurl }}">click to view it</a> on phishtank.</p>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</center>
|
||||
{% endif%}
|
||||
{% if vt %}
|
||||
<hr>
|
||||
<center><h1 class="display-4">Virus Total</h1></center>
|
||||
|
|
Loading…
Reference in New Issue