lookyloo/lookyloo/modules/phishtank.py

145 lines
5.3 KiB
Python
Raw Normal View History

2021-09-16 16:33:44 +02:00
#!/usr/bin/env python3
import json
from datetime import date, datetime, timedelta, timezone
2021-09-23 13:58:40 +02:00
from typing import Any, Dict, Optional, List
2021-09-16 16:33:44 +02:00
from har2tree import CrawledTree
from pyphishtanklookup import PhishtankLookup
2021-10-18 13:06:43 +02:00
from ..default import ConfigError, get_homedir
from ..helpers import get_cache_directory
2021-09-16 16:33:44 +02:00
class Phishtank():
def __init__(self, config: Dict[str, Any]):
if not config.get('enabled'):
self.available = False
return
self.available = True
self.allow_auto_trigger = False
if config.get('url'):
self.client = PhishtankLookup(config['url'])
else:
self.client = PhishtankLookup()
if config.get('allow_auto_trigger'):
self.allow_auto_trigger = True
self.storage_dir_pt = get_homedir() / 'phishtank'
self.storage_dir_pt.mkdir(parents=True, exist_ok=True)
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
url_storage_dir = get_cache_directory(self.storage_dir_pt, url, 'url')
2021-09-16 16:33:44 +02:00
if not url_storage_dir.exists():
return None
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
if not cached_entries:
return None
with cached_entries[0].open() as f:
return json.load(f)
2021-09-23 13:58:40 +02:00
def lookup_ips_capture(self, crawled_tree: CrawledTree) -> Dict[str, List[Dict[str, Any]]]:
with (crawled_tree.root_hartree.har.path.parent / 'ips.json').open() as f:
ips_dump = json.load(f)
to_return: Dict[str, List[Dict[str, Any]]] = {}
2022-03-31 11:30:53 +02:00
for ip in {ip for ips_list in ips_dump.values() for ip in ips_list}:
2021-09-23 13:58:40 +02:00
entry = self.get_ip_lookup(ip)
if not entry:
continue
to_return[ip] = []
for url in entry['urls']:
entry = self.get_url_lookup(url)
if entry:
to_return[ip].append(entry)
return to_return
def get_ip_lookup(self, ip: str) -> Optional[Dict[str, Any]]:
ip_storage_dir = get_cache_directory(self.storage_dir_pt, ip, 'ip')
2021-09-23 13:58:40 +02:00
if not ip_storage_dir.exists():
return None
cached_entries = sorted(ip_storage_dir.glob('*'), reverse=True)
if not cached_entries:
return None
with cached_entries[0].open() as f:
return json.load(f)
2021-09-16 16:33:44 +02:00
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> Dict:
'''Run the module on all the nodes up to the final redirect'''
if not self.available:
return {'error': 'Module not available'}
if auto_trigger and not self.allow_auto_trigger:
return {'error': 'Auto trigger not allowed on module'}
# Quit if the capture is more than 70h old, the data in phishtank expire around that time.
if crawled_tree.start_time <= datetime.now(timezone.utc) - timedelta(hours=70):
return {'error': 'Capture to old, the response will be irrelevant.'}
2021-09-23 13:58:40 +02:00
# Check URLs up to the redirect
2021-09-16 16:33:44 +02:00
if crawled_tree.redirects:
for redirect in crawled_tree.redirects:
self.url_lookup(redirect)
else:
self.url_lookup(crawled_tree.root_hartree.har.root_url)
2021-09-23 13:58:40 +02:00
# Check all the IPs in the ips file of the capture
with (crawled_tree.root_hartree.har.path.parent / 'ips.json').open() as f:
ips_dump = json.load(f)
2022-03-31 11:30:53 +02:00
for ip in {ip for ips_list in ips_dump.values() for ip in ips_list}:
2021-09-23 13:58:40 +02:00
self.ip_lookup(ip)
2021-09-16 16:33:44 +02:00
return {'success': 'Module triggered'}
2021-09-23 13:58:40 +02:00
def ip_lookup(self, ip: str) -> None:
'''Lookup for the URLs related to an IP on Phishtank lookup
Note: It will trigger a request to phishtank every time *until* there is a hit (it's cheap), then once a day.
'''
if not self.available:
raise ConfigError('Phishtank not available, probably not enabled.')
ip_storage_dir = get_cache_directory(self.storage_dir_pt, ip, 'ip')
2021-09-23 13:58:40 +02:00
ip_storage_dir.mkdir(parents=True, exist_ok=True)
pt_file = ip_storage_dir / date.today().isoformat()
if pt_file.exists():
return
urls = self.client.get_urls_by_ip(ip)
if not urls:
2022-05-05 01:22:59 +02:00
try:
ip_storage_dir.rmdir()
except OSError:
# no need to print an exception.
pass
2021-09-23 13:58:40 +02:00
return
to_dump = {'ip': ip, 'urls': urls}
with pt_file.open('w') as _f:
json.dump(to_dump, _f)
for url in urls:
self.url_lookup(url)
2021-09-16 16:33:44 +02:00
def url_lookup(self, url: str) -> None:
'''Lookup an URL on Phishtank lookup
Note: It will trigger a request to phishtank every time *until* there is a hit (it's cheap), then once a day.
'''
if not self.available:
2021-09-21 18:01:32 +02:00
raise ConfigError('Phishtank not available, probably not enabled.')
2021-09-16 16:33:44 +02:00
url_storage_dir = get_cache_directory(self.storage_dir_pt, url, 'url')
2021-09-16 16:33:44 +02:00
url_storage_dir.mkdir(parents=True, exist_ok=True)
pt_file = url_storage_dir / date.today().isoformat()
if pt_file.exists():
return
url_information = self.client.get_url_entry(url)
2021-11-30 14:59:48 +01:00
if not url_information:
2021-12-01 09:37:57 +01:00
url_storage_dir.rmdir()
2021-11-30 14:59:48 +01:00
return
with pt_file.open('w') as _f:
json.dump(url_information, _f)