diff --git a/lookyloo/modules/pi.py b/lookyloo/modules/pi.py index 6abfa3a..553f59e 100644 --- a/lookyloo/modules/pi.py +++ b/lookyloo/modules/pi.py @@ -1,17 +1,16 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import hashlib import json import time from datetime import date -from pathlib import Path from typing import Any, Dict, Optional from har2tree import CrawledTree from pyeupi import PyEUPI from ..default import ConfigError, get_homedir +from ..helpers import get_cache_directory class PhishingInitiative(): @@ -35,13 +34,8 @@ class PhishingInitiative(): self.storage_dir_eupi = get_homedir() / 'eupi' self.storage_dir_eupi.mkdir(parents=True, exist_ok=True) - def __get_cache_directory(self, url: str) -> Path: - m = hashlib.md5() - m.update(url.encode()) - return self.storage_dir_eupi / m.hexdigest() - def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]: - url_storage_dir = self.__get_cache_directory(url) + url_storage_dir = get_cache_directory(self.storage_dir_eupi, url) if not url_storage_dir.exists(): return None cached_entries = sorted(url_storage_dir.glob('*'), reverse=True) @@ -76,7 +70,7 @@ class PhishingInitiative(): if not self.available: raise ConfigError('PhishingInitiative not available, probably no API key') - url_storage_dir = self.__get_cache_directory(url) + url_storage_dir = get_cache_directory(self.storage_dir_eupi, url) url_storage_dir.mkdir(parents=True, exist_ok=True) pi_file = url_storage_dir / date.today().isoformat() diff --git a/lookyloo/modules/urlscan.py b/lookyloo/modules/urlscan.py index 7384d30..c88d4e5 100644 --- a/lookyloo/modules/urlscan.py +++ b/lookyloo/modules/urlscan.py @@ -1,17 +1,15 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import hashlib import json import logging from datetime import date -from pathlib import Path from typing import Any, Dict import requests from ..default import ConfigError, get_config, get_homedir -from ..helpers import get_useragent_for_requests +from ..helpers import get_useragent_for_requests, get_cache_directory class UrlScan(): @@ -53,16 +51,10 @@ class UrlScan(): self.storage_dir_urlscan = get_homedir() / 'urlscan' self.storage_dir_urlscan.mkdir(parents=True, exist_ok=True) - def __get_cache_directory(self, url: str, useragent: str, referer: str) -> Path: - m = hashlib.md5() - to_hash = f'{url}{useragent}{referer}' - m.update(to_hash.encode()) - return self.storage_dir_urlscan / m.hexdigest() - def get_url_submission(self, capture_info: Dict[str, Any]) -> Dict[str, Any]: - url_storage_dir = self.__get_cache_directory(capture_info['url'], - capture_info['user_agent'], - capture_info['referer']) / 'submit' + url_storage_dir = get_cache_directory( + self.storage_dir_urlscan, + f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit' if not url_storage_dir.exists(): return {} cached_entries = sorted(url_storage_dir.glob('*'), reverse=True) @@ -123,9 +115,9 @@ class UrlScan(): if not self.available: raise ConfigError('UrlScan not available, probably no API key') - url_storage_dir = self.__get_cache_directory(capture_info['url'], - capture_info['user_agent'], - capture_info['referer']) / 'submit' + url_storage_dir = get_cache_directory( + self.storage_dir_urlscan, + f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit' url_storage_dir.mkdir(parents=True, exist_ok=True) urlscan_file_submit = url_storage_dir / date.today().isoformat() diff --git a/lookyloo/modules/vt.py b/lookyloo/modules/vt.py index 443b463..eede917 100644 --- a/lookyloo/modules/vt.py +++ b/lookyloo/modules/vt.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import hashlib import json import time from datetime import date -from pathlib import Path from typing import Any, Dict, Optional import vt # type: ignore @@ -13,6 +11,7 @@ from har2tree import CrawledTree from vt.error import APIError # type: ignore from ..default import ConfigError, get_homedir +from ..helpers import get_cache_directory class VirusTotal(): @@ -36,14 +35,8 @@ class VirusTotal(): self.storage_dir_vt = get_homedir() / 'vt_url' self.storage_dir_vt.mkdir(parents=True, exist_ok=True) - def __get_cache_directory(self, url: str) -> Path: - url_id = vt.url_id(url) - m = hashlib.md5() - m.update(url_id.encode()) - return self.storage_dir_vt / m.hexdigest() - def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]: - url_storage_dir = self.__get_cache_directory(url) + url_storage_dir = get_cache_directory(self.storage_dir_vt, vt.url_id(url)) if not url_storage_dir.exists(): return None cached_entries = sorted(url_storage_dir.glob('*'), reverse=True) @@ -78,7 +71,7 @@ class VirusTotal(): if not self.available: raise ConfigError('VirusTotal not available, probably no API key') - url_storage_dir = self.__get_cache_directory(url) + url_storage_dir = get_cache_directory(self.storage_dir_vt, vt.url_id(url)) url_storage_dir.mkdir(parents=True, exist_ok=True) vt_file = url_storage_dir / date.today().isoformat()