new: Hashlookup integration

pull/298/head
Raphaël Vinot 2021-11-30 14:59:48 +01:00
parent a55fb5380a
commit 8f4040b806
10 changed files with 233 additions and 29 deletions

View File

@ -38,7 +38,12 @@
},
"Phishtank": {
"enabled": false,
"url": null,
"url": "https://phishtankapi.circl.lu/",
"allow_auto_trigger": true
},
"Hashlookup": {
"enabled": false,
"url": "https://hashlookup.circl.lu/",
"allow_auto_trigger": true
},
"_notes": {
@ -51,6 +56,7 @@
"MISP": "Module to query MISP: https://www.misp-project.org/",
"UniversalWhois": "Module to query a local instance of uWhoisd: https://github.com/Lookyloo/uwhoisd",
"UrlScan": "Module to query urlscan.io",
"Phishtank": "Module to query Phishtank Lookup (https://github.com/Lookyloo/phishtank-lookup). URL set to none means querying the public instance."
"Phishtank": "Module to query Phishtank Lookup (https://github.com/Lookyloo/phishtank-lookup). URL set to none means querying the public instance.",
"Hashlookup": "Module to query Hashlookup (https://github.com/adulau/hashlookup-server). URL set to none means querying the public instance."
}
}

View File

@ -33,7 +33,7 @@ from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
get_resources_hashes, get_splash_url, get_taxonomies, uniq_domains)
from .indexing import Indexing
from .modules import (MISP, PhishingInitiative, UniversalWhois,
UrlScan, VirusTotal, Phishtank)
UrlScan, VirusTotal, Phishtank, Hashlookup)
class Lookyloo():
@ -78,6 +78,10 @@ class Lookyloo():
if not self.phishtank.available:
self.logger.warning('Unable to setup the Phishtank module')
self.hashlookup = Hashlookup(get_config('modules', 'Hashlookup'))
if not self.hashlookup.available:
self.logger.warning('Unable to setup the Hashlookup module')
self.logger.info('Initializing context...')
self.context = Context()
self.logger.info('Context initialized.')
@ -222,6 +226,7 @@ class Lookyloo():
return {'error': f'UUID {capture_uuid} is either unknown or the tree is not ready yet.'}
self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
self.hashlookup.capture_default_trigger(ct, auto_trigger=auto_trigger)
to_return: Dict[str, Dict] = {'PhishingInitiative': {}, 'VirusTotal': {}, 'UrlScan': {}}
capture_cache = self.capture_cache(capture_uuid)
@ -719,8 +724,44 @@ class Lookyloo():
to_return[event_id].update(values)
return to_return
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, hashes_only: bool=False, urls_only: bool=False):
"""Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user.
If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build
with the tree. This method is computing the hashes when you query it, so it is slower."""
ct = self.get_crawled_tree(tree_uuid)
hashes = ct.root_hartree.build_all_hashes(algorithm)
if hashes_only:
return list(hashes.keys())
if urls_only:
return {h: [node.name for node in nodes] for h, nodes in hashes.items()}
return hashes
def merge_hashlookup_tree(self, tree_uuid: str, /):
if not self.hashlookup.available:
raise LookylooException('Hashlookup module not enabled.')
hashes_tree = self.get_hashes_with_context(tree_uuid, algorithm='sha1')
hashlookup_file = self._captures_index[tree_uuid].capture_dir / 'hashlookup.json'
if not hashlookup_file.exists():
ct = self.get_crawled_tree(tree_uuid)
self.hashlookup.capture_default_trigger(ct, auto_trigger=False)
if not hashlookup_file.exists():
# no hits on hashlookup
return
with hashlookup_file.open() as f:
hashlookup_entries = json.load(f)
to_return = defaultdict(dict)
for sha1 in hashlookup_entries.keys():
to_return[sha1]['nodes'] = hashes_tree[sha1]
to_return[sha1]['hashlookup'] = hashlookup_entries[sha1]
return to_return
def get_hashes(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]:
"""Return hashes of resources.
"""Return hashes (sha512) of resources.
Only tree_uuid: All the hashes
tree_uuid and hostnode_uuid: hashes of all the resources in that hostnode (including embedded ressources)
tree_uuid, hostnode_uuid, and urlnode_uuid: hash of the URL node body, and embedded resources

View File

@ -8,3 +8,4 @@ from .urlscan import UrlScan # noqa
from .uwhois import UniversalWhois # noqa
from .vt import VirusTotal # noqa
from .phishtank import Phishtank # noqa
from .hashlookup import HashlookupModule as Hashlookup # noqa

View File

@ -0,0 +1,64 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
from typing import Any, Dict, List
from har2tree import CrawledTree
from pyhashlookup import Hashlookup
from ..default import ConfigError
class HashlookupModule():
'''This module is a bit different as it will trigger a lookup of all the hashes
and store the response in the capture directory'''
def __init__(self, config: Dict[str, Any]):
if not config.get('enabled'):
self.available = False
return
self.available = True
self.allow_auto_trigger = False
if config.get('url'):
self.client = Hashlookup(config['url'])
else:
self.client = Hashlookup()
if config.get('allow_auto_trigger'):
self.allow_auto_trigger = True
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> Dict:
'''Run the module on all the nodes up to the final redirect'''
if not self.available:
return {'error': 'Module not available'}
if auto_trigger and not self.allow_auto_trigger:
return {'error': 'Auto trigger not allowed on module'}
store_file = crawled_tree.root_hartree.har.path.parent / 'hashlookup.json'
if store_file.exists():
return {'success': 'Module triggered'}
hashes = crawled_tree.root_hartree.build_all_hashes('sha1')
hits_hashlookup = self.hashes_lookup(list(hashes.keys()))
if hits_hashlookup:
# we got at least one hit, saving
with store_file.open('w') as f:
json.dump(hits_hashlookup, f, indent=2)
return {'success': 'Module triggered'}
def hashes_lookup(self, hashes: List[str]) -> Dict[str, Dict[str, str]]:
'''Lookup a list of hashes against Hashlookup
Note: It will trigger a request to hashlookup every time *until* there is a hit, then once a day.
'''
if not self.available:
raise ConfigError('Hashlookup not available, probably not enabled.')
to_return = {}
for entry in self.client.sha1_bulk_lookup(hashes):
if 'SHA-1' in entry:
to_return[entry['SHA-1'].lower()] = entry
return to_return

View File

@ -118,6 +118,7 @@ class Phishtank():
urls = self.client.get_urls_by_ip(ip)
if not urls:
ip_storage_dir.unlink()
return
to_dump = {'ip': ip, 'urls': urls}
with pt_file.open('w') as _f:
@ -140,6 +141,9 @@ class Phishtank():
return
url_information = self.client.get_url_entry(url)
if url_information:
with pt_file.open('w') as _f:
json.dump(url_information, _f)
if not url_information:
url_storage_dir.unlink()
return
with pt_file.open('w') as _f:
json.dump(url_information, _f)

55
poetry.lock generated
View File

@ -396,7 +396,7 @@ hyperframe = ">=5.2.0,<6"
[[package]]
name = "har2tree"
version = "1.9.1"
version = "1.9.2"
description = "HTTP Archive (HAR) to ETE Toolkit generator"
category = "main"
optional = false
@ -408,13 +408,13 @@ cchardet = ">=2.1.7,<3.0.0"
ete3 = ">=3.1.2,<4.0.0"
filetype = ">=1.0.8,<2.0.0"
lxml = ">=4.6.3,<5.0.0"
numpy = ">=1.21.2,<2.0.0"
numpy = ">=1.21.4,<2.0.0"
publicsuffix2 = ">=2.20191221,<3.0"
six = ">=1.16.0,<2.0.0"
w3lib = ">=1.22.0,<2.0.0"
[package.extras]
docs = ["Sphinx (>=4.2.0,<5.0.0)"]
docs = ["Sphinx (>=4.3.0,<5.0.0)"]
[[package]]
name = "hiredis"
@ -472,7 +472,7 @@ scripts = ["click (>=6.0)", "twisted (>=16.4.0)"]
[[package]]
name = "ipython"
version = "7.29.0"
version = "7.30.0"
description = "IPython: Productive Interactive Computing"
category = "dev"
optional = false
@ -731,7 +731,7 @@ python-versions = "*"
[[package]]
name = "prompt-toolkit"
version = "3.0.22"
version = "3.0.23"
description = "Library for building powerful interactive command lines in Python"
category = "dev"
optional = false
@ -837,6 +837,21 @@ category = "main"
optional = false
python-versions = ">=3.5"
[[package]]
name = "pyhashlookup"
version = "1.1"
description = "Python CLI and module for CIRCL hash lookup"
category = "main"
optional = false
python-versions = ">=3.8,<4.0"
[package.dependencies]
dnspython = ">=2.1.0,<3.0.0"
requests = ">=2.26.0,<3.0.0"
[package.extras]
docs = ["Sphinx (>=4.3.1,<5.0.0)"]
[[package]]
name = "pylookyloo"
version = "1.9.0"
@ -1016,7 +1031,7 @@ use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
[[package]]
name = "rich"
version = "10.14.0"
version = "10.15.1"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
category = "main"
optional = false
@ -1215,7 +1230,7 @@ types-MarkupSafe = "*"
[[package]]
name = "types-markupsafe"
version = "1.1.9"
version = "1.1.10"
description = "Typing stubs for MarkupSafe"
category = "dev"
optional = false
@ -1357,7 +1372,7 @@ misp = ["python-magic", "pydeep"]
[metadata]
lock-version = "1.1"
python-versions = ">=3.8,<3.11"
content-hash = "9423f914579690635fab7c71e0f299cf17917d5277514a89cb3474d81ad7a016"
content-hash = "dc3eee3a35264d9d47d1e91b97a518f5be514566f923d817d30bacb18384fc0a"
[metadata.files]
aiohttp = [
@ -1727,8 +1742,8 @@ h2 = [
{file = "h2-3.2.0.tar.gz", hash = "sha256:875f41ebd6f2c44781259005b157faed1a5031df3ae5aa7bcb4628a6c0782f14"},
]
har2tree = [
{file = "har2tree-1.9.1-py3-none-any.whl", hash = "sha256:b3ff9307d2b08c755258211aa27a7ed114cca257b49c2c73f15300bd059b22f5"},
{file = "har2tree-1.9.1.tar.gz", hash = "sha256:fa81639762c7ebb505b6755b48134e96bfe9d7434a8ab0ce595c928785434d4c"},
{file = "har2tree-1.9.2-py3-none-any.whl", hash = "sha256:e94ec07e85eedf2d63a2fee8c9c26e7cc5c73ebe96eb243bea3e238b6240cb37"},
{file = "har2tree-1.9.2.tar.gz", hash = "sha256:7a9854b9d6052d53bc5a76aab6687d4fe797b82f1150c2c4a4c96d0009ef2485"},
]
hiredis = [
{file = "hiredis-2.0.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b4c8b0bc5841e578d5fb32a16e0c305359b987b850a06964bd5a62739d688048"},
@ -1794,8 +1809,8 @@ incremental = [
{file = "incremental-21.3.0.tar.gz", hash = "sha256:02f5de5aff48f6b9f665d99d48bfc7ec03b6e3943210de7cfc88856d755d6f57"},
]
ipython = [
{file = "ipython-7.29.0-py3-none-any.whl", hash = "sha256:a658beaf856ce46bc453366d5dc6b2ddc6c481efd3540cb28aa3943819caac9f"},
{file = "ipython-7.29.0.tar.gz", hash = "sha256:4f69d7423a5a1972f6347ff233e38bbf4df6a150ef20fbb00c635442ac3060aa"},
{file = "ipython-7.30.0-py3-none-any.whl", hash = "sha256:c8f3e07aefb9cf9e067f39686f035ce09b27a1ee602116a3030b91b6fc138ee4"},
{file = "ipython-7.30.0.tar.gz", hash = "sha256:d41f8e80b99690122400f9b2069b12f670246a1b4cc5d332bd6c4e2500e6d6fb"},
]
itemadapter = [
{file = "itemadapter-0.4.0-py3-none-any.whl", hash = "sha256:695809a4e2f42174f0392dd66c2ceb2b2454d3ebbf65a930e5c85910d8d88d8f"},
@ -2154,8 +2169,8 @@ priority = [
{file = "priority-1.3.0.tar.gz", hash = "sha256:6bc1961a6d7fcacbfc337769f1a382c8e746566aaa365e78047abe9f66b2ffbe"},
]
prompt-toolkit = [
{file = "prompt_toolkit-3.0.22-py3-none-any.whl", hash = "sha256:48d85cdca8b6c4f16480c7ce03fd193666b62b0a21667ca56b4bb5ad679d1170"},
{file = "prompt_toolkit-3.0.22.tar.gz", hash = "sha256:449f333dd120bd01f5d296a8ce1452114ba3a71fae7288d2f0ae2c918764fa72"},
{file = "prompt_toolkit-3.0.23-py3-none-any.whl", hash = "sha256:5f29d62cb7a0ecacfa3d8ceea05a63cd22500543472d64298fc06ddda906b25d"},
{file = "prompt_toolkit-3.0.23.tar.gz", hash = "sha256:7053aba00895473cb357819358ef33f11aa97e4ac83d38efb123e5649ceeecaf"},
]
protego = [
{file = "Protego-0.1.16.tar.gz", hash = "sha256:a682771bc7b51b2ff41466460896c1a5a653f9a1e71639ef365a72e66d8734b4"},
@ -2221,6 +2236,10 @@ pygments = [
{file = "Pygments-2.10.0-py3-none-any.whl", hash = "sha256:b8e67fe6af78f492b3c4b3e2970c0624cbf08beb1e493b2c99b9fa1b67a20380"},
{file = "Pygments-2.10.0.tar.gz", hash = "sha256:f398865f7eb6874156579fdf36bc840a03cab64d1cde9e93d68f46a425ec52c6"},
]
pyhashlookup = [
{file = "pyhashlookup-1.1-py3-none-any.whl", hash = "sha256:442cd03b9676e02c5fccb9968128debd73b7ad080ae46f9fa3694b0fad3ae0c5"},
{file = "pyhashlookup-1.1.tar.gz", hash = "sha256:f43da968e762331ca25e52d0d031ed577183d31e228ec504ca9712670114f4e5"},
]
pylookyloo = [
{file = "pylookyloo-1.9.0-py3-none-any.whl", hash = "sha256:d0ad7de714e6f8b1b024c1b296f0ca8c7ad83a1b8d6dd78c8655a3561d52089d"},
{file = "pylookyloo-1.9.0.tar.gz", hash = "sha256:2123fa321058b7aa2d7269911d46fe79df00a00be25c96372ed9502a2749f309"},
@ -2296,8 +2315,8 @@ requests = [
{file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},
]
rich = [
{file = "rich-10.14.0-py3-none-any.whl", hash = "sha256:ab9cbfd7a3802d8c6f0fa91e974630e2a69447972dcbb9dfe9b01016dd95e38e"},
{file = "rich-10.14.0.tar.gz", hash = "sha256:8bfe4546d56b4131298d3a9e571a0742de342f1593770bd0d4707299f772a0af"},
{file = "rich-10.15.1-py3-none-any.whl", hash = "sha256:a59fb2721c52c5061ac65f318c0afb709e098b1ab6ce5813ea38982654c4b6ee"},
{file = "rich-10.15.1.tar.gz", hash = "sha256:93d0ea3c35ecfd8703dbe52b76885e224ad8d68c7766c921c726b14b22a57b7d"},
]
scrapy = [
{file = "Scrapy-2.5.1-py2.py3-none-any.whl", hash = "sha256:1a9a36970004950ee3c519a14c4db945f9d9a63fecb3d593dddcda477331dde9"},
@ -2362,8 +2381,8 @@ types-jinja2 = [
{file = "types_Jinja2-2.11.9-py3-none-any.whl", hash = "sha256:60a1e21e8296979db32f9374d8a239af4cb541ff66447bb915d8ad398f9c63b2"},
]
types-markupsafe = [
{file = "types-MarkupSafe-1.1.9.tar.gz", hash = "sha256:a9212736c37763549a96d71e358bc0fc16723798468b40fe3615960e9e2a8b1b"},
{file = "types_MarkupSafe-1.1.9-py3-none-any.whl", hash = "sha256:91777713ffea6b70ce6ef85abaf544bfba1671d47a5985fc88a75136541b99c6"},
{file = "types-MarkupSafe-1.1.10.tar.gz", hash = "sha256:85b3a872683d02aea3a5ac2a8ef590193c344092032f58457287fbf8e06711b1"},
{file = "types_MarkupSafe-1.1.10-py3-none-any.whl", hash = "sha256:ca2bee0f4faafc45250602567ef38d533e877d2ddca13003b319c551ff5b3cc5"},
]
types-pkg-resources = [
{file = "types-pkg_resources-0.1.3.tar.gz", hash = "sha256:834a9b8d3dbea343562fd99d5d3359a726f6bf9d3733bccd2b4f3096fbab9dae"},

View File

@ -48,7 +48,7 @@ vt-py = "^0.8.0"
pyeupi = "^1.1"
scrapysplashwrapper = "^1.9.3"
pysanejs = "^2.0"
har2tree = "^1.9.1"
har2tree = "^1.9.2"
pylookyloo = "^1.9"
dnspython = "^2.1.0"
pytaxonomies = "^1.4.1"
@ -64,6 +64,7 @@ rich = "^10.14.0"
pyphishtanklookup = "^1.0.1"
chardet = "^4.0.0"
Flask-Cors = "^3.0.10"
pyhashlookup = "^1.1"
[tool.poetry.extras]
misp = ['python-magic', 'pydeep']
@ -71,9 +72,9 @@ misp = ['python-magic', 'pydeep']
[tool.poetry.dev-dependencies]
mypy = "^0.910"
ipython = "^7.29.0"
types-redis = "^4.0"
types-requests = "^2.26"
types-Flask = "^1.1.5"
types-redis = "^4.0.1"
types-requests = "^2.26.1"
types-Flask = "^1.1.6"
types-pkg-resources = "^0.1.2"
[build-system]

View File

@ -513,6 +513,12 @@ def urls_rendered_page(tree_uuid: str):
return render_template('urls_rendered.html', base_tree_uuid=tree_uuid, urls=urls)
@app.route('/tree/<string:tree_uuid>/hashlookup', methods=['GET'])
def hashlookup(tree_uuid: str):
merged = lookyloo.merge_hashlookup_tree(tree_uuid)
return render_template('hashlookup.html', base_tree_uuid=tree_uuid, merged=merged)
@app.route('/bulk_captures/<string:base_tree_uuid>', methods=['POST'])
def bulk_captures(base_tree_uuid: str):
if flask_login.current_user.is_authenticated:

View File

@ -0,0 +1,26 @@
<div>
{% for sha1, entries in merged.items() %}
<dl class="row">
<dt class="col-sm-2">URLs in tree</dt>
<dd class="col-sm-10">
{% for node in entries['nodes'] %}
{{ node.name }} </br>
{% endfor %}
</dd>
</dl>
<dl class="row">
<dt class="col-sm-2">Entries on hashlookup</dt>
<dd class="col-sm-7">
{% for k, v in entries['hashlookup'].items() %}
<b>{{k}}</b>:
{% if k == "SHA-1" %}
<a href="https://hashlookup.circl.lu/lookup/sha1/{{ v }}">{{ v }}</a>
{% else %}
{{ v }}
{% endif %}
</br>
{% endfor %}
</dd>
</dl>
{% endfor %}
</div>

View File

@ -34,6 +34,13 @@
integrity="{{get_sri('static', 'tree.js')}}"
crossorigin="anonymous"></script>
<script>
$('#hashlookupModal').on('show.bs.modal', function(e) {
var button = $(e.relatedTarget);
var modal = $(this);
modal.find('.modal-body').load(button.data("remote"));
});
</script>
<script>
$('#modulesModal').on('show.bs.modal', function(e) {
var button = $(e.relatedTarget);
@ -203,6 +210,12 @@
<a href="#modulesModal" data-remote="{{ url_for('trigger_modules', tree_uuid=tree_uuid, force=False) }}"
data-toggle="modal" data-target="#modulesModal" role="button">Third Party Reports</a>
</li>
{% if current_user.is_authenticated %}
<li>
<a href="#hashlookupModal" data-remote="{{ url_for('hashlookup', tree_uuid=tree_uuid) }}"
data-toggle="modal" data-target="#hashlookupModal" role="button">Hashlookup hits</a>
</li>
{% endif %}
</ul>
</div>
<hr class="vertical">
@ -512,6 +525,29 @@
</div>
</div>
<div class="modal fade" id="hashlookupModal" tabindex="-1" role="dialog">
<div class="modal-dialog modal-xl" role="document">
<div class="modal-content">
<div class="modal-header">
<h4 class="modal-title" id="hashlookupModalLabel">
Hits in Hashlookup
</h4>
</br>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">×</span>
</button>
</div>
</br>
<div class="modal-body">
... loading results from hashlookup ...
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
</div>
</div>
</div>
</div>
{% if enable_context_by_users %}
<div class="modal fade" id="categoriesModal" tabindex="-1" role="dialog">
<div class="modal-dialog modal-xl" role="document">