fix: [Modules] Missing change in helpers, fix urlscan cache storage

pull/348/head
Raphaël Vinot 2022-02-03 12:49:16 +01:00
parent afc77126d4
commit 43a29f5781
3 changed files with 22 additions and 16 deletions

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import json
import logging
import os
@ -174,3 +175,12 @@ def splash_status() -> Tuple[bool, str]:
return False, f'HTTP error occurred: {http_err}'
except Exception as err:
return False, f'Other error occurred: {err}'
def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] = None) -> Path:
m = hashlib.md5()
m.update(identifier.encode())
digest = m.hexdigest()
if namespace:
root = root / namespace
return root / digest[0] / digest[1] / digest[2] / digest

View File

@ -1,25 +1,15 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import json
from datetime import date, datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, Optional, List
from har2tree import CrawledTree
from pyphishtanklookup import PhishtankLookup
from ..default import ConfigError, get_homedir
def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] = None) -> Path:
m = hashlib.md5()
m.update(identifier.encode())
digest = m.hexdigest()
if namespace:
root = root / namespace
return root / digest[0] / digest[1] / digest[2] / digest
from ..helpers import get_cache_directory
class Phishtank():

View File

@ -54,7 +54,8 @@ class UrlScan():
def get_url_submission(self, capture_info: Dict[str, Any]) -> Dict[str, Any]:
url_storage_dir = get_cache_directory(
self.storage_dir_urlscan,
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit'
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}',
'submit')
if not url_storage_dir.exists():
return {}
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
@ -117,7 +118,8 @@ class UrlScan():
url_storage_dir = get_cache_directory(
self.storage_dir_urlscan,
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit'
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}',
'submit')
url_storage_dir.mkdir(parents=True, exist_ok=True)
urlscan_file_submit = url_storage_dir / date.today().isoformat()
@ -146,14 +148,18 @@ class UrlScan():
submission = self.get_url_submission(capture_info)
if submission and 'uuid' in submission:
uuid = submission['uuid']
if (self.storage_dir_urlscan / f'{uuid}.json').exists():
with (self.storage_dir_urlscan / f'{uuid}.json').open() as _f:
url_storage_dir_response = get_cache_directory(
self.storage_dir_urlscan,
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}',
'response')
if (url_storage_dir_response / f'{uuid}.json').exists():
with (url_storage_dir_response / f'{uuid}.json').open() as _f:
return json.load(_f)
try:
result = self.__url_result(uuid)
except requests.exceptions.HTTPError as e:
return {'error': e}
with (self.storage_dir_urlscan / f'{uuid}.json').open('w') as _f:
with (url_storage_dir_response / f'{uuid}.json').open('w') as _f:
json.dump(result, _f)
return result
return {'error': 'Submission incomplete or unavailable.'}