fix: [Modules] Missing change in helpers, fix urlscan cache storage

pull/348/head
Raphaël Vinot 2022-02-03 12:49:16 +01:00
parent afc77126d4
commit 43a29f5781
3 changed files with 22 additions and 16 deletions

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import hashlib
import json import json
import logging import logging
import os import os
@ -174,3 +175,12 @@ def splash_status() -> Tuple[bool, str]:
return False, f'HTTP error occurred: {http_err}' return False, f'HTTP error occurred: {http_err}'
except Exception as err: except Exception as err:
return False, f'Other error occurred: {err}' return False, f'Other error occurred: {err}'
def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] = None) -> Path:
m = hashlib.md5()
m.update(identifier.encode())
digest = m.hexdigest()
if namespace:
root = root / namespace
return root / digest[0] / digest[1] / digest[2] / digest

View File

@ -1,25 +1,15 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import hashlib
import json import json
from datetime import date, datetime, timedelta, timezone from datetime import date, datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, Optional, List from typing import Any, Dict, Optional, List
from har2tree import CrawledTree from har2tree import CrawledTree
from pyphishtanklookup import PhishtankLookup from pyphishtanklookup import PhishtankLookup
from ..default import ConfigError, get_homedir from ..default import ConfigError, get_homedir
from ..helpers import get_cache_directory
def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] = None) -> Path:
m = hashlib.md5()
m.update(identifier.encode())
digest = m.hexdigest()
if namespace:
root = root / namespace
return root / digest[0] / digest[1] / digest[2] / digest
class Phishtank(): class Phishtank():

View File

@ -54,7 +54,8 @@ class UrlScan():
def get_url_submission(self, capture_info: Dict[str, Any]) -> Dict[str, Any]: def get_url_submission(self, capture_info: Dict[str, Any]) -> Dict[str, Any]:
url_storage_dir = get_cache_directory( url_storage_dir = get_cache_directory(
self.storage_dir_urlscan, self.storage_dir_urlscan,
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit' f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}',
'submit')
if not url_storage_dir.exists(): if not url_storage_dir.exists():
return {} return {}
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True) cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
@ -117,7 +118,8 @@ class UrlScan():
url_storage_dir = get_cache_directory( url_storage_dir = get_cache_directory(
self.storage_dir_urlscan, self.storage_dir_urlscan,
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit' f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}',
'submit')
url_storage_dir.mkdir(parents=True, exist_ok=True) url_storage_dir.mkdir(parents=True, exist_ok=True)
urlscan_file_submit = url_storage_dir / date.today().isoformat() urlscan_file_submit = url_storage_dir / date.today().isoformat()
@ -146,14 +148,18 @@ class UrlScan():
submission = self.get_url_submission(capture_info) submission = self.get_url_submission(capture_info)
if submission and 'uuid' in submission: if submission and 'uuid' in submission:
uuid = submission['uuid'] uuid = submission['uuid']
if (self.storage_dir_urlscan / f'{uuid}.json').exists(): url_storage_dir_response = get_cache_directory(
with (self.storage_dir_urlscan / f'{uuid}.json').open() as _f: self.storage_dir_urlscan,
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}',
'response')
if (url_storage_dir_response / f'{uuid}.json').exists():
with (url_storage_dir_response / f'{uuid}.json').open() as _f:
return json.load(_f) return json.load(_f)
try: try:
result = self.__url_result(uuid) result = self.__url_result(uuid)
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
return {'error': e} return {'error': e}
with (self.storage_dir_urlscan / f'{uuid}.json').open('w') as _f: with (url_storage_dir_response / f'{uuid}.json').open('w') as _f:
json.dump(result, _f) json.dump(result, _f)
return result return result
return {'error': 'Submission incomplete or unavailable.'} return {'error': 'Submission incomplete or unavailable.'}