chg: Improve module auto trigger

pull/205/head
Raphaël Vinot 2021-05-19 15:03:07 -07:00
parent 7b4f5bd6d2
commit 11f05626b5
5 changed files with 94 additions and 25 deletions

View File

@ -1,14 +1,17 @@
{ {
"VirusTotal": { "VirusTotal": {
"apikey": null, "apikey": null,
"autosubmit": false "autosubmit": false,
"allow_auto_trigger": false
}, },
"PhishingInitiative": { "PhishingInitiative": {
"apikey": null, "apikey": null,
"autosubmit": false "autosubmit": false,
"allow_auto_trigger": false
}, },
"SaneJS": { "SaneJS": {
"enabled": true "enabled": true,
"allow_auto_trigger": true
}, },
"MISP": { "MISP": {
"apikey": null, "apikey": null,
@ -18,16 +21,19 @@
"enable_lookup": false, "enable_lookup": false,
"enable_push": false, "enable_push": false,
"default_tags": [], "default_tags": [],
"auto_publish": false "auto_publish": false,
"allow_auto_trigger": false
}, },
"UniversalWhois": { "UniversalWhois": {
"enabled": false, "enabled": false,
"ipaddress": "127.0.0.1", "ipaddress": "127.0.0.1",
"port": 4243 "port": 4243,
"allow_auto_trigger": true
}, },
"_notes": { "_notes": {
"apikey": "null disables the module. Pass a string otherwise.", "apikey": "null disables the module. Pass a string otherwise.",
"autosubmit": "Automatically submits the URL to the 3rd party service.", "autosubmit": "Automatically submits the URL to the 3rd party service.",
"allow_auto_trigger": "Allow auto trigger per module: some (i.e. VT) can be very expensive",
"VirusTotal": "Module to query Virustotal: https://www.virustotal.com/", "VirusTotal": "Module to query Virustotal: https://www.virustotal.com/",
"PhishingInitiative": "Module to query phishing initiative: https://phishing-initiative.fr/contrib/", "PhishingInitiative": "Module to query phishing initiative: https://phishing-initiative.fr/contrib/",
"SaneJS": "Module to query SaneJS: https://github.com/Lookyloo/sanejs", "SaneJS": "Module to query SaneJS: https://github.com/Lookyloo/sanejs",

View File

@ -155,6 +155,7 @@ class Lookyloo():
ct = CrawledTree(har_files, capture_uuid) ct = CrawledTree(har_files, capture_uuid)
self._ensure_meta(capture_dir, ct) self._ensure_meta(capture_dir, ct)
self._resolve_dns(ct) self._resolve_dns(ct)
self.context.contextualize_tree(ct)
# Force update cache of the capture (takes care of the incomplete redirect key) # Force update cache of the capture (takes care of the incomplete redirect key)
self._set_capture_cache(capture_dir, force=True) self._set_capture_cache(capture_dir, force=True)
cache = self.capture_cache(capture_uuid) cache = self.capture_cache(capture_uuid)
@ -353,7 +354,7 @@ class Lookyloo():
with (capture_dir / 'categories').open('w') as f: with (capture_dir / 'categories').open('w') as f:
f.writelines(f'{t}\n' for t in current_categories) f.writelines(f'{t}\n' for t in current_categories)
def trigger_modules(self, capture_uuid: str, /, force: bool=False) -> None: def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> None:
'''Launch the 3rd party modules on a capture. '''Launch the 3rd party modules on a capture.
It uses the cached result *if* the module was triggered the same day. It uses the cached result *if* the module was triggered the same day.
The `force` flag re-triggers the module regardless of the cache.''' The `force` flag re-triggers the module regardless of the cache.'''
@ -363,19 +364,9 @@ class Lookyloo():
self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_uuid}) is cached.') self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_uuid}) is cached.')
return return
if self.pi.available: self.pi.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
if ct.redirects: self.vt.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
for redirect in ct.redirects: self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
self.pi.url_lookup(redirect, force)
else:
self.pi.url_lookup(ct.root_hartree.har.root_url, force)
if self.vt.available:
if ct.redirects:
for redirect in ct.redirects:
self.vt.url_lookup(redirect, force)
else:
self.vt.url_lookup(ct.root_hartree.har.root_url, force)
def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]: def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]:
'''Get the responses of the modules from the cached responses on the disk''' '''Get the responses of the modules from the cached responses on the disk'''
@ -1178,6 +1169,7 @@ class Lookyloo():
raise MissingUUID(f'Unable to find UUID {node_uuid} in {node_uuid}') raise MissingUUID(f'Unable to find UUID {node_uuid} in {node_uuid}')
known_content = self.context.find_known_content(hostnode) known_content = self.context.find_known_content(hostnode)
self.uwhois.query_whois_hostnode(hostnode)
urls: List[Dict[str, Any]] = [] urls: List[Dict[str, Any]] = []
for url in hostnode.urls: for url in hostnode.urls:

View File

@ -19,6 +19,8 @@ from pysanejs import SaneJS
from pyeupi import PyEUPI from pyeupi import PyEUPI
from pymisp import PyMISP, MISPEvent, MISPAttribute from pymisp import PyMISP, MISPEvent, MISPAttribute
from har2tree import CrawledTree, HostNode
class MISP(): class MISP():
@ -33,6 +35,7 @@ class MISP():
self.available = True self.available = True
self.enable_lookup = False self.enable_lookup = False
self.enable_push = False self.enable_push = False
self.allow_auto_trigger = False
try: try:
self.client = PyMISP(url=config['url'], key=config['apikey'], self.client = PyMISP(url=config['url'], key=config['apikey'],
ssl=config['verify_tls_cert'], timeout=config['timeout']) ssl=config['verify_tls_cert'], timeout=config['timeout'])
@ -45,6 +48,8 @@ class MISP():
self.enable_lookup = True self.enable_lookup = True
if config.get('enable_push'): if config.get('enable_push'):
self.enable_push = True self.enable_push = True
if config.get('allow_auto_trigger'):
self.allow_auto_trigger = True
self.default_tags: List[str] = config.get('default_tags') # type: ignore self.default_tags: List[str] = config.get('default_tags') # type: ignore
self.auto_publish = config.get('auto_publish') self.auto_publish = config.get('auto_publish')
self.storage_dir_misp = get_homedir() / 'misp' self.storage_dir_misp = get_homedir() / 'misp'
@ -126,6 +131,10 @@ class UniversalWhois():
return return
self.server = config.get('ipaddress') self.server = config.get('ipaddress')
self.port = config.get('port') self.port = config.get('port')
self.allow_auto_trigger = False
if config.get('allow_auto_trigger'):
self.allow_auto_trigger = True
try: try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.connect((self.server, self.port)) sock.connect((self.server, self.port))
@ -135,6 +144,27 @@ class UniversalWhois():
return return
self.available = True self.available = True
def query_whois_hostnode(self, hostnode: HostNode) -> None:
if hasattr(hostnode, 'resolved_ips'):
for ip in hostnode.resolved_ips:
self.whois(ip)
if hasattr(hostnode, 'cnames'):
for cname in hostnode.cnames:
self.whois(cname)
self.whois(hostnode.name)
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, force: bool=False, auto_trigger: bool=False) -> None:
'''Run the module on all the nodes up to the final redirect'''
if not self.available:
return None
if auto_trigger and not self.allow_auto_trigger:
return None
hostnode = crawled_tree.root_hartree.get_host_node_by_uuid(crawled_tree.root_hartree.rendered_node.hostnode_uuid)
self.query_whois_hostnode(hostnode)
for n in hostnode.get_ancestors():
self.query_whois_hostnode(n)
def whois(self, query: str) -> str: def whois(self, query: str) -> str:
bytes_whois = b'' bytes_whois = b''
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
@ -163,6 +193,9 @@ class SaneJavaScript():
self.available = False self.available = False
return return
self.available = True self.available = True
self.allow_auto_trigger = False
if config.get('allow_auto_trigger'):
self.allow_auto_trigger = True
self.storage_dir = get_homedir() / 'sanejs' self.storage_dir = get_homedir() / 'sanejs'
self.storage_dir.mkdir(parents=True, exist_ok=True) self.storage_dir.mkdir(parents=True, exist_ok=True)
@ -231,9 +264,15 @@ class PhishingInitiative():
self.available = True self.available = True
self.autosubmit = False self.autosubmit = False
self.allow_auto_trigger = False
self.client = PyEUPI(config['apikey']) self.client = PyEUPI(config['apikey'])
if config.get('allow_auto_trigger'):
self.allow_auto_trigger = True
if config.get('autosubmit'): if config.get('autosubmit'):
self.autosubmit = True self.autosubmit = True
self.storage_dir_eupi = get_homedir() / 'eupi' self.storage_dir_eupi = get_homedir() / 'eupi'
self.storage_dir_eupi.mkdir(parents=True, exist_ok=True) self.storage_dir_eupi.mkdir(parents=True, exist_ok=True)
@ -253,6 +292,19 @@ class PhishingInitiative():
with cached_entries[0].open() as f: with cached_entries[0].open() as f:
return json.load(f) return json.load(f)
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, force: bool=False, auto_trigger: bool=False) -> None:
'''Run the module on all the nodes up to the final redirect'''
if not self.available:
return None
if auto_trigger and not self.allow_auto_trigger:
return None
if crawled_tree.redirects:
for redirect in crawled_tree.redirects:
self.url_lookup(redirect, force)
else:
self.url_lookup(crawled_tree.root_hartree.har.root_url, force)
def url_lookup(self, url: str, force: bool=False) -> None: def url_lookup(self, url: str, force: bool=False) -> None:
'''Lookup an URL on Phishing Initiative '''Lookup an URL on Phishing Initiative
Note: force means 2 things: Note: force means 2 things:
@ -304,9 +356,15 @@ class VirusTotal():
self.available = True self.available = True
self.autosubmit = False self.autosubmit = False
self.allow_auto_trigger = False
self.client = vt.Client(config['apikey']) self.client = vt.Client(config['apikey'])
if config.get('allow_auto_trigger'):
self.allow_auto_trigger = True
if config.get('autosubmit'): if config.get('autosubmit'):
self.autosubmit = True self.autosubmit = True
self.storage_dir_vt = get_homedir() / 'vt_url' self.storage_dir_vt = get_homedir() / 'vt_url'
self.storage_dir_vt.mkdir(parents=True, exist_ok=True) self.storage_dir_vt.mkdir(parents=True, exist_ok=True)
@ -327,6 +385,19 @@ class VirusTotal():
with cached_entries[0].open() as f: with cached_entries[0].open() as f:
return json.load(f) return json.load(f)
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, force: bool=False, auto_trigger: bool=False) -> None:
'''Run the module on all the nodes up to the final redirect'''
if not self.available:
return None
if auto_trigger and not self.allow_auto_trigger:
return None
if crawled_tree.redirects:
for redirect in crawled_tree.redirects:
self.url_lookup(redirect, force)
else:
self.url_lookup(crawled_tree.root_hartree.har.root_url, force)
def url_lookup(self, url: str, force: bool=False) -> None: def url_lookup(self, url: str, force: bool=False) -> None:
'''Lookup an URL on VT '''Lookup an URL on VT
Note: force means 2 things: Note: force means 2 things:

View File

@ -284,10 +284,11 @@ def rebuild_tree(tree_uuid: str):
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route('/tree/<string:tree_uuid>/trigger_modules/', defaults={'force': False}) @app.route('/tree/<string:tree_uuid>/trigger_modules', methods=['GET'])
@app.route('/tree/<string:tree_uuid>/trigger_modules/<int:force>', methods=['GET']) def trigger_modules(tree_uuid: str):
def trigger_modules(tree_uuid: str, force: int): force = True if request.args.get('force') else False
lookyloo.trigger_modules(tree_uuid, True if force else False) auto_trigger = True if request.args.get('auto_trigger') else False
lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger)
return redirect(url_for('modules', tree_uuid=tree_uuid)) return redirect(url_for('modules', tree_uuid=tree_uuid))
@ -512,7 +513,6 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
try: try:
ct = lookyloo.get_crawled_tree(tree_uuid) ct = lookyloo.get_crawled_tree(tree_uuid)
ct = lookyloo.context.contextualize_tree(ct)
b64_thumbnail = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=True) b64_thumbnail = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=True)
screenshot_size = lookyloo.get_screenshot(tree_uuid).getbuffer().nbytes screenshot_size = lookyloo.get_screenshot(tree_uuid).getbuffer().nbytes
meta = lookyloo.get_meta(tree_uuid) meta = lookyloo.get_meta(tree_uuid)

View File

@ -96,7 +96,7 @@
{% if auto_trigger_modules %} {% if auto_trigger_modules %}
<script> <script>
$.get("{{ url_for('trigger_modules', tree_uuid=tree_uuid) }}") $.get("{{ url_for('trigger_modules', tree_uuid=tree_uuid, auto_trigger=True) }}")
</script> </script>
{% endif%} {% endif%}
{% endblock %} {% endblock %}