new: Phishing Initiative module

pull/79/head
Raphaël Vinot 2020-06-09 15:06:35 +02:00
parent d2d6acab28
commit 285a0cf727
8 changed files with 134 additions and 5 deletions

View File

@ -3,6 +3,10 @@
"apikey": "KEY",
"autosubmit": false
},
"PhishingInitiative": {
"apikey": "KEY",
"autosubmit": false
},
"SaneJS": {
"enabled": true
}

View File

@ -26,7 +26,7 @@ from scrapysplashwrapper import crawl
from .exceptions import NoValidHarFile, MissingUUID
from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template
from .modules import VirusTotal, SaneJavaScript
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
class Lookyloo():
@ -47,6 +47,10 @@ class Lookyloo():
if 'modules' not in self.configs:
self.logger.info('No third party components available in the config directory')
else:
if 'PhishingInitiative' in self.configs['modules']:
self.pi = PhishingInitiative(self.configs['modules']['PhishingInitiative'])
if not self.pi.available:
self.logger.warning('Unable to setup the PhishingInitiative module')
if 'VirusTotal' in self.configs['modules']:
self.vt = VirusTotal(self.configs['modules']['VirusTotal'])
if not self.vt.available:
@ -113,6 +117,13 @@ class Lookyloo():
self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_dir}) is cached.')
return
if hasattr(self, 'pi') and self.pi.available:
if ct.redirects:
for redirect in ct.redirects:
self.pi.url_lookup(redirect, force)
else:
self.pi.url_lookup(ct.root_hartree.har.root_url, force)
if hasattr(self, 'vt') and self.vt.available:
if ct.redirects:
for redirect in ct.redirects:
@ -133,6 +144,13 @@ class Lookyloo():
to_return['vt'][redirect] = self.vt.get_url_lookup(redirect)
else:
to_return['vt'][ct.root_hartree.har.root_url] = self.vt.get_url_lookup(ct.root_hartree.har.root_url)
if hasattr(self, 'pi') and self.pi.available:
to_return['pi'] = {}
if ct.redirects:
for redirect in ct.redirects:
to_return['pi'][redirect] = self.pi.get_url_lookup(redirect)
else:
to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url)
return to_return
def _set_capture_cache(self, capture_dir: Path, force: bool=False) -> None:

View File

@ -14,6 +14,7 @@ from .exceptions import ConfigError
import vt # type: ignore
from pysanejs import SaneJS
from pyeupi import PyEUPI
class SaneJavaScript():
@ -114,6 +115,79 @@ class SaneJavaScript():
return to_return
class PhishingInitiative():
def __init__(self, config: Dict[str, Any]):
if 'apikey' not in config:
self.available = False
return
self.available = True
self.autosubmit = False
self.client = PyEUPI(config['apikey'])
if config.get('autosubmit'):
self.autosubmit = True
self.storage_dir_eupi = get_homedir() / 'eupi'
self.storage_dir_eupi.mkdir(parents=True, exist_ok=True)
def __get_cache_directory(self, url: str) -> Path:
m = hashlib.md5()
m.update(url.encode())
return self.storage_dir_eupi / m.hexdigest()
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
url_storage_dir = self.__get_cache_directory(url)
if not url_storage_dir.exists():
return None
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
if not cached_entries:
return None
with cached_entries[0].open() as f:
return json.load(f)
def url_lookup(self, url: str, force: bool=False) -> None:
'''Lookup an URL on Phishing Initiative
Note: force means 2 things:
* (re)scan of the URL
* re fetch the object from Phishing Initiative even if we already did it today
Note: the URL will only be sent for scan if autosubmit is set to true in the config
'''
if not self.available:
raise ConfigError('PhishingInitiative not available, probably no API key')
url_storage_dir = self.__get_cache_directory(url)
url_storage_dir.mkdir(parents=True, exist_ok=True)
pi_file = url_storage_dir / date.today().isoformat()
scan_requested = False
if self.autosubmit and force:
self.client.post_submission(url, comment='Received on Lookyloo')
scan_requested = True
if not force and pi_file.exists():
return
for i in range(3):
url_information = self.client.lookup(url)
if not url_information['results']:
# No results, that should not happen (?)
break
if url_information['results'][0]['tag'] == -1:
# Not submitted
if not self.autosubmit:
break
if not scan_requested:
self.client.post_submission(url, comment='Received on Lookyloo')
scan_requested = True
time.sleep(1)
else:
with pi_file.open('w') as _f:
json.dump(url_information, _f)
break
class VirusTotal():
def __init__(self, config: Dict[str, Any]):

17
poetry.lock generated
View File

@ -651,6 +651,17 @@ optional = false
python-versions = "*"
version = "2.0.5"
[[package]]
category = "main"
description = "Python API for the European Union anti-phishing initiative."
name = "pyeupi"
optional = false
python-versions = ">=3.6,<4.0"
version = "1.1"
[package.dependencies]
requests = ">=2.23.0,<3.0.0"
[[package]]
category = "dev"
description = "Pygments is a syntax highlighting package written in Python."
@ -1071,7 +1082,7 @@ test = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata]
content-hash = "b339ef339a6d6a067fe9d0652ee896aaa3a210dc1762a2172df882632fcdb7b3"
content-hash = "d42b2b2df27af13f97538c78e96e6bc9d32df61aa16db8c18ff6a67b924151ee"
python-versions = "^3.7"
[metadata.files]
@ -1480,6 +1491,10 @@ pydispatcher = [
{file = "PyDispatcher-2.0.5.tar.gz", hash = "sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf"},
{file = "PyDispatcher-2.0.5.zip", hash = "sha256:5be4a8be12805ef7d712dd9a93284fb8bc53f309867e573f653a72e5fd10e433"},
]
pyeupi = [
{file = "pyeupi-1.1-py3-none-any.whl", hash = "sha256:a0798a4a52601b0840339449a1bbf2aa2bc180d8f82a979022954e05fcb5bfba"},
{file = "pyeupi-1.1.tar.gz", hash = "sha256:2309c61ac2ef0eafabd6e9f32a0078069ffbba0e113ebc6b51cffc1869094472"},
]
pygments = [
{file = "Pygments-2.6.1-py3-none-any.whl", hash = "sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324"},
{file = "Pygments-2.6.1.tar.gz", hash = "sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44"},

View File

@ -45,6 +45,7 @@ cloudscraper = "^1.2.20"
defang = "^0.5.3"
vt-py = "^0.5.2"
Flask-HTTPAuth = "^3.3.0"
pyeupi = "^1.0"
[tool.poetry.dev-dependencies]
mypy = "^0.761"

View File

@ -288,7 +288,15 @@ def modules(tree_uuid: str):
if result['category'] == 'malicious':
vt_short_result[url]['malicious'].append((vendor, result['result']))
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result)
pi_short_result: Dict[str, str] = {}
if 'pi' in modules_responses:
pi = modules_responses.pop('pi')
for url, full_report in pi.items():
if not full_report:
continue
pi_short_result[url] = full_report['results'][0]['tag_label']
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result, pi=pi_short_result)
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])

View File

@ -138,7 +138,6 @@
</ul>
</p>
{% endif %}
</li>
<li class="list-group-item">
@ -180,6 +179,8 @@
</ul>
</p>
{% endif %}
</li>
</ul>
</li>
{% endfor %}
</ul>

View File

@ -3,7 +3,7 @@
<center><h1 class="display-4">Virus Total</h1></center>
{% for url, entries in vt.items() %}
<div class="border-top my-3"></div>
<center><h3><small class="text-muted">URL</small><br>{{ url }}</h3>
<center><h3><small class="text-muted">URL</small><br>{{ url }}</h3></center>
{% if entries['malicious'] %}
<center>
<p class="lead">Detected as malicious by the following vendors</p>
@ -20,4 +20,12 @@
<h5 class="text-right"><a href="{{ entries['permaurl'] }}">Full report on VirusTotal</a></h5>
{% endfor %}
{% endif%}
{% if pi%}
<center><h1 class="display-4">Phishing Initiative</h1></center>
{% for url, tag in pi.items() %}
<center><h3><small class="text-muted">URL</small><br>{{ url }}</h3>
<div>This URL is tagged as <b>{{ tag }}</b> on Phishing Initiative</div>
</center>
{% endfor %}
{% endif%}
</div>