mirror of https://github.com/CIRCL/lookyloo
fix: [Modules] Missing change in helpers, fix urlscan cache storage
parent
afc77126d4
commit
43a29f5781
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
@ -174,3 +175,12 @@ def splash_status() -> Tuple[bool, str]:
|
||||||
return False, f'HTTP error occurred: {http_err}'
|
return False, f'HTTP error occurred: {http_err}'
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return False, f'Other error occurred: {err}'
|
return False, f'Other error occurred: {err}'
|
||||||
|
|
||||||
|
|
||||||
|
def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] = None) -> Path:
|
||||||
|
m = hashlib.md5()
|
||||||
|
m.update(identifier.encode())
|
||||||
|
digest = m.hexdigest()
|
||||||
|
if namespace:
|
||||||
|
root = root / namespace
|
||||||
|
return root / digest[0] / digest[1] / digest[2] / digest
|
||||||
|
|
|
@ -1,25 +1,15 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import json
|
import json
|
||||||
from datetime import date, datetime, timedelta, timezone
|
from datetime import date, datetime, timedelta, timezone
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any, Dict, Optional, List
|
from typing import Any, Dict, Optional, List
|
||||||
|
|
||||||
from har2tree import CrawledTree
|
from har2tree import CrawledTree
|
||||||
from pyphishtanklookup import PhishtankLookup
|
from pyphishtanklookup import PhishtankLookup
|
||||||
|
|
||||||
from ..default import ConfigError, get_homedir
|
from ..default import ConfigError, get_homedir
|
||||||
|
from ..helpers import get_cache_directory
|
||||||
|
|
||||||
def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] = None) -> Path:
|
|
||||||
m = hashlib.md5()
|
|
||||||
m.update(identifier.encode())
|
|
||||||
digest = m.hexdigest()
|
|
||||||
if namespace:
|
|
||||||
root = root / namespace
|
|
||||||
return root / digest[0] / digest[1] / digest[2] / digest
|
|
||||||
|
|
||||||
|
|
||||||
class Phishtank():
|
class Phishtank():
|
||||||
|
|
|
@ -54,7 +54,8 @@ class UrlScan():
|
||||||
def get_url_submission(self, capture_info: Dict[str, Any]) -> Dict[str, Any]:
|
def get_url_submission(self, capture_info: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
url_storage_dir = get_cache_directory(
|
url_storage_dir = get_cache_directory(
|
||||||
self.storage_dir_urlscan,
|
self.storage_dir_urlscan,
|
||||||
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit'
|
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}',
|
||||||
|
'submit')
|
||||||
if not url_storage_dir.exists():
|
if not url_storage_dir.exists():
|
||||||
return {}
|
return {}
|
||||||
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
|
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
|
||||||
|
@ -117,7 +118,8 @@ class UrlScan():
|
||||||
|
|
||||||
url_storage_dir = get_cache_directory(
|
url_storage_dir = get_cache_directory(
|
||||||
self.storage_dir_urlscan,
|
self.storage_dir_urlscan,
|
||||||
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}') / 'submit'
|
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}',
|
||||||
|
'submit')
|
||||||
url_storage_dir.mkdir(parents=True, exist_ok=True)
|
url_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
urlscan_file_submit = url_storage_dir / date.today().isoformat()
|
urlscan_file_submit = url_storage_dir / date.today().isoformat()
|
||||||
|
|
||||||
|
@ -146,14 +148,18 @@ class UrlScan():
|
||||||
submission = self.get_url_submission(capture_info)
|
submission = self.get_url_submission(capture_info)
|
||||||
if submission and 'uuid' in submission:
|
if submission and 'uuid' in submission:
|
||||||
uuid = submission['uuid']
|
uuid = submission['uuid']
|
||||||
if (self.storage_dir_urlscan / f'{uuid}.json').exists():
|
url_storage_dir_response = get_cache_directory(
|
||||||
with (self.storage_dir_urlscan / f'{uuid}.json').open() as _f:
|
self.storage_dir_urlscan,
|
||||||
|
f'{capture_info["url"]}{capture_info["user_agent"]}{capture_info["referer"]}',
|
||||||
|
'response')
|
||||||
|
if (url_storage_dir_response / f'{uuid}.json').exists():
|
||||||
|
with (url_storage_dir_response / f'{uuid}.json').open() as _f:
|
||||||
return json.load(_f)
|
return json.load(_f)
|
||||||
try:
|
try:
|
||||||
result = self.__url_result(uuid)
|
result = self.__url_result(uuid)
|
||||||
except requests.exceptions.HTTPError as e:
|
except requests.exceptions.HTTPError as e:
|
||||||
return {'error': e}
|
return {'error': e}
|
||||||
with (self.storage_dir_urlscan / f'{uuid}.json').open('w') as _f:
|
with (url_storage_dir_response / f'{uuid}.json').open('w') as _f:
|
||||||
json.dump(result, _f)
|
json.dump(result, _f)
|
||||||
return result
|
return result
|
||||||
return {'error': 'Submission incomplete or unavailable.'}
|
return {'error': 'Submission incomplete or unavailable.'}
|
||||||
|
|
Loading…
Reference in New Issue