mirror of https://github.com/CIRCL/lookyloo
chg: Slight cleanup
parent
7f6a59a441
commit
01fff00cad
|
@ -17,12 +17,14 @@ from .modules import SaneJavaScript
|
||||||
|
|
||||||
class Context():
|
class Context():
|
||||||
|
|
||||||
def __init__(self, sanejs: SaneJavaScript):
|
def __init__(self):
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), db=1, decode_responses=True)
|
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), db=1, decode_responses=True)
|
||||||
self.sanejs = sanejs
|
|
||||||
self._cache_known_content()
|
self._cache_known_content()
|
||||||
|
self.sanejs = SaneJavaScript(get_config('modules', 'SaneJS'))
|
||||||
|
if not self.sanejs.available:
|
||||||
|
self.logger.warning('Unable to setup the SaneJS module')
|
||||||
|
|
||||||
def clear_context(self):
|
def clear_context(self):
|
||||||
self.redis.flushdb()
|
self.redis.flushdb()
|
||||||
|
|
|
@ -39,7 +39,7 @@ from .helpers import (CaptureStatus, get_captures_dir, get_config,
|
||||||
load_pickle_tree, remove_pickle_tree, try_make_file,
|
load_pickle_tree, remove_pickle_tree, try_make_file,
|
||||||
uniq_domains)
|
uniq_domains)
|
||||||
from .indexing import Indexing
|
from .indexing import Indexing
|
||||||
from .modules import (MISP, PhishingInitiative, SaneJavaScript, UniversalWhois,
|
from .modules import (MISP, PhishingInitiative, UniversalWhois,
|
||||||
UrlScan, VirusTotal, Phishtank)
|
UrlScan, VirusTotal, Phishtank)
|
||||||
|
|
||||||
|
|
||||||
|
@ -69,10 +69,6 @@ class Lookyloo():
|
||||||
if not self.vt.available:
|
if not self.vt.available:
|
||||||
self.logger.warning('Unable to setup the VirusTotal module')
|
self.logger.warning('Unable to setup the VirusTotal module')
|
||||||
|
|
||||||
self.sanejs = SaneJavaScript(get_config('modules', 'SaneJS'))
|
|
||||||
if not self.sanejs.available:
|
|
||||||
self.logger.warning('Unable to setup the SaneJS module')
|
|
||||||
|
|
||||||
self.misp = MISP(get_config('modules', 'MISP'))
|
self.misp = MISP(get_config('modules', 'MISP'))
|
||||||
if not self.misp.available:
|
if not self.misp.available:
|
||||||
self.logger.warning('Unable to setup the MISP module')
|
self.logger.warning('Unable to setup the MISP module')
|
||||||
|
@ -89,7 +85,7 @@ class Lookyloo():
|
||||||
if not self.phishtank.available:
|
if not self.phishtank.available:
|
||||||
self.logger.warning('Unable to setup the Phishtank module')
|
self.logger.warning('Unable to setup the Phishtank module')
|
||||||
|
|
||||||
self.context = Context(self.sanejs)
|
self.context = Context()
|
||||||
self._captures_index: Dict[str, CaptureCache] = {}
|
self._captures_index: Dict[str, CaptureCache] = {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -135,30 +131,6 @@ class Lookyloo():
|
||||||
def _cache_capture(self, capture_uuid: str, /) -> CrawledTree:
|
def _cache_capture(self, capture_uuid: str, /) -> CrawledTree:
|
||||||
'''Generate the pickle, set the cache, add capture in the indexes'''
|
'''Generate the pickle, set the cache, add capture in the indexes'''
|
||||||
|
|
||||||
def _ensure_meta(capture_dir: Path, tree: CrawledTree) -> None:
|
|
||||||
'''Make sure the meta file is present, it contains information about the User Agent used for the capture.'''
|
|
||||||
metafile = capture_dir / 'meta'
|
|
||||||
if metafile.exists():
|
|
||||||
return
|
|
||||||
ua = UserAgent(tree.root_hartree.user_agent)
|
|
||||||
to_dump = {}
|
|
||||||
if ua.platform:
|
|
||||||
to_dump['os'] = ua.platform
|
|
||||||
if ua.browser:
|
|
||||||
if ua.version:
|
|
||||||
to_dump['browser'] = f'{ua.browser} {ua.version}'
|
|
||||||
else:
|
|
||||||
to_dump['browser'] = ua.browser
|
|
||||||
if ua.language:
|
|
||||||
to_dump['language'] = ua.language
|
|
||||||
|
|
||||||
if not to_dump:
|
|
||||||
# UA not recognized
|
|
||||||
self.logger.info(f'Unable to recognize the User agent: {ua}')
|
|
||||||
to_dump['user_agent'] = ua.string
|
|
||||||
with metafile.open('w') as f:
|
|
||||||
json.dump(to_dump, f)
|
|
||||||
|
|
||||||
capture_dir = self._get_capture_dir(capture_uuid)
|
capture_dir = self._get_capture_dir(capture_uuid)
|
||||||
|
|
||||||
har_files = sorted(capture_dir.glob('*.har'))
|
har_files = sorted(capture_dir.glob('*.har'))
|
||||||
|
@ -185,7 +157,6 @@ class Lookyloo():
|
||||||
index = True
|
index = True
|
||||||
try:
|
try:
|
||||||
ct = CrawledTree(har_files, capture_uuid)
|
ct = CrawledTree(har_files, capture_uuid)
|
||||||
_ensure_meta(capture_dir, ct)
|
|
||||||
self._resolve_dns(ct)
|
self._resolve_dns(ct)
|
||||||
self.context.contextualize_tree(ct)
|
self.context.contextualize_tree(ct)
|
||||||
cache = self.capture_cache(capture_uuid)
|
cache = self.capture_cache(capture_uuid)
|
||||||
|
@ -407,10 +378,30 @@ class Lookyloo():
|
||||||
def get_meta(self, capture_uuid: str, /) -> Dict[str, str]:
|
def get_meta(self, capture_uuid: str, /) -> Dict[str, str]:
|
||||||
'''Get the meta informations from a capture (mostly, details about the User Agent used.)'''
|
'''Get the meta informations from a capture (mostly, details about the User Agent used.)'''
|
||||||
capture_dir = self._get_capture_dir(capture_uuid)
|
capture_dir = self._get_capture_dir(capture_uuid)
|
||||||
|
metafile = capture_dir / 'meta'
|
||||||
|
if metafile.exists():
|
||||||
|
with metafile.open('r') as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
meta = {}
|
meta = {}
|
||||||
if (capture_dir / 'meta').exists():
|
ct = self.get_crawled_tree(capture_uuid)
|
||||||
with open((capture_dir / 'meta'), 'r') as f:
|
ua = UserAgent(ct.root_hartree.user_agent)
|
||||||
meta = json.load(f)
|
meta['user_agent'] = ua.string
|
||||||
|
if ua.platform:
|
||||||
|
meta['os'] = ua.platform
|
||||||
|
if ua.browser:
|
||||||
|
if ua.version:
|
||||||
|
meta['browser'] = f'{ua.browser} {ua.version}'
|
||||||
|
else:
|
||||||
|
meta['browser'] = ua.browser
|
||||||
|
if ua.language:
|
||||||
|
meta['language'] = ua.language
|
||||||
|
|
||||||
|
if not meta:
|
||||||
|
# UA not recognized
|
||||||
|
self.logger.info(f'Unable to recognize the User agent: {ua}')
|
||||||
|
with metafile.open('w') as f:
|
||||||
|
json.dump(meta, f)
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
def categories_capture(self, capture_uuid: str, /) -> Dict[str, Any]:
|
def categories_capture(self, capture_uuid: str, /) -> Dict[str, Any]:
|
||||||
|
|
|
@ -75,7 +75,7 @@ class Phishtank():
|
||||||
Note: It will trigger a request to phishtank every time *until* there is a hit (it's cheap), then once a day.
|
Note: It will trigger a request to phishtank every time *until* there is a hit (it's cheap), then once a day.
|
||||||
'''
|
'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
raise ConfigError('VirusTotal not available, probably no API key')
|
raise ConfigError('Phishtank not available, probably not enabled.')
|
||||||
|
|
||||||
url_storage_dir = self.__get_cache_directory(url)
|
url_storage_dir = self.__get_cache_directory(url)
|
||||||
url_storage_dir.mkdir(parents=True, exist_ok=True)
|
url_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
Loading…
Reference in New Issue