From 43a29f578184962a43f1c948f2205e128605c424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 3 Feb 2022 12:49:16 +0100 Subject: [PATCH] fix: [Modules] Missing change in helpers, fix urlscan cache storage --- lookyloo/helpers.py | 10 ++++++++++ lookyloo/modules/phishtank.py | 12 +----------- lookyloo/modules/urlscan.py | 16 +++++++++++----- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index 98dab1e..cfb80f2 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import hashlib import json import logging import os @@ -174,3 +175,12 @@ def splash_status() -> Tuple[bool, str]: return False, f'HTTP error occurred: {http_err}' except Exception as err: return False, f'Other error occurred: {err}' + + +def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] = None) -> Path: + m = hashlib.md5() + m.update(identifier.encode()) + digest = m.hexdigest() + if namespace: + root = root / namespace + return root / digest[0] / digest[1] / digest[2] / digest diff --git a/lookyloo/modules/phishtank.py b/lookyloo/modules/phishtank.py index db87414..7ae1a65 100644 --- a/lookyloo/modules/phishtank.py +++ b/lookyloo/modules/phishtank.py @@ -1,25 +1,15 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import hashlib import json from datetime import date, datetime, timedelta, timezone -from pathlib import Path from typing import Any, Dict, Optional, List from har2tree import CrawledTree from pyphishtanklookup import PhishtankLookup from ..default import ConfigError, get_homedir - - -def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] = None) -> Path: - m = hashlib.md5() - m.update(identifier.encode()) - digest = m.hexdigest() - if namespace: - root = root / namespace - return root / digest[0] / digest[1] / digest[2] / digest +from ..helpers import get_cache_directory class Phishtank(): diff --git a/lookyloo/modules/urlscan.py b/lookyloo/modules/urlscan.py index c88d4e5..40f133d 100644 --- a/lookyloo/modules/urlscan.py +++ b/lookyloo/modules/urlscan.py @@ -54,7 +54,8 @@ class UrlScan(): def get_url_submission(self, capture_info: Dict[str, Any]) -> Dict[str, Any]: url_storage_dir = get_cache_directory( self.storage_dir_urlscan, - f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit' + f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}', + 'submit') if not url_storage_dir.exists(): return {} cached_entries = sorted(url_storage_dir.glob('*'), reverse=True) @@ -117,7 +118,8 @@ class UrlScan(): url_storage_dir = get_cache_directory( self.storage_dir_urlscan, - f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit' + f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}', + 'submit') url_storage_dir.mkdir(parents=True, exist_ok=True) urlscan_file_submit = url_storage_dir / date.today().isoformat() @@ -146,14 +148,18 @@ class UrlScan(): submission = self.get_url_submission(capture_info) if submission and 'uuid' in submission: uuid = submission['uuid'] - if (self.storage_dir_urlscan / f'{uuid}.json').exists(): - with (self.storage_dir_urlscan / f'{uuid}.json').open() as _f: + url_storage_dir_response = get_cache_directory( + self.storage_dir_urlscan, + f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}', + 'response') + if (url_storage_dir_response / f'{uuid}.json').exists(): + with (url_storage_dir_response / f'{uuid}.json').open() as _f: return json.load(_f) try: result = self.__url_result(uuid) except requests.exceptions.HTTPError as e: return {'error': e} - with (self.storage_dir_urlscan / f'{uuid}.json').open('w') as _f: + with (url_storage_dir_response / f'{uuid}.json').open('w') as _f: json.dump(result, _f) return result return {'error': 'Submission incomplete or unavailable.'}