mirror of https://github.com/CIRCL/lookyloo
chg: cleanup in the mail lookyloo class
parent
8433cbcc1b
commit
81390d5ea0
|
@ -25,7 +25,7 @@ class Archiver(AbstractManager):
|
||||||
self.redis = Redis(unix_socket_path=get_socket_path('cache'))
|
self.redis = Redis(unix_socket_path=get_socket_path('cache'))
|
||||||
|
|
||||||
# make sure archived captures dir exists
|
# make sure archived captures dir exists
|
||||||
self.archived_captures_dir = get_homedir / 'archived_captures'
|
self.archived_captures_dir = get_homedir() / 'archived_captures'
|
||||||
self.archived_captures_dir.mkdir(parents=True, exist_ok=True)
|
self.archived_captures_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
self._load_archives()
|
self._load_archives()
|
||||||
|
@ -78,8 +78,8 @@ class Archiver(AbstractManager):
|
||||||
|
|
||||||
if archived_uuids:
|
if archived_uuids:
|
||||||
p = self.redis.pipeline()
|
p = self.redis.pipeline()
|
||||||
p.redis.hdel('lookup_dirs', *archived_uuids.keys())
|
p.hdel('lookup_dirs', *archived_uuids.keys())
|
||||||
p.redis.hset('lookup_dirs_archived', mapping=archived_uuids)
|
p.hmset('lookup_dirs_archived', archived_uuids) # type: ignore
|
||||||
p.execute()
|
p.execute()
|
||||||
self.logger.info('Archiving done.')
|
self.logger.info('Archiving done.')
|
||||||
|
|
||||||
|
@ -91,8 +91,8 @@ class Archiver(AbstractManager):
|
||||||
if not (month / 'index').exists():
|
if not (month / 'index').exists():
|
||||||
continue
|
continue
|
||||||
with (month / 'index').open('r') as _f:
|
with (month / 'index').open('r') as _f:
|
||||||
archived_uuids = {uuid: str(month / dirname) for uuid, dirname in csv.reader(_f)}
|
archived_uuids: Dict[str, str] = {uuid: str(month / dirname) for uuid, dirname in csv.reader(_f)}
|
||||||
self.redis.hset('lookup_dirs_archived', mapping=archived_uuids)
|
self.redis.hmset('lookup_dirs_archived', archived_uuids) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from collections import Counter
|
||||||
|
from datetime import timedelta, date
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
from redis import Redis
|
||||||
|
from werkzeug.useragents import UserAgent
|
||||||
|
|
||||||
|
from lookyloo.abstractmanager import AbstractManager
|
||||||
|
from lookyloo.helpers import (get_config, get_homedir, get_socket_path,
|
||||||
|
safe_create_dir)
|
||||||
|
|
||||||
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||||
|
level=logging.INFO, datefmt='%I:%M:%S')
|
||||||
|
|
||||||
|
|
||||||
|
class Processing(AbstractManager):
|
||||||
|
|
||||||
|
def __init__(self, loglevel: int=logging.INFO):
|
||||||
|
super().__init__(loglevel)
|
||||||
|
self.script_name = 'archiver'
|
||||||
|
|
||||||
|
self.use_own_ua = get_config('generic', 'use_user_agents_users')
|
||||||
|
|
||||||
|
def _to_run_forever(self):
|
||||||
|
if self.use_own_ua:
|
||||||
|
self._build_ua_file()
|
||||||
|
|
||||||
|
def _build_ua_file(self):
|
||||||
|
'''Build a file in a format compatible with the capture page'''
|
||||||
|
yesterday = (date.today() - timedelta(days=1))
|
||||||
|
self_generated_ua_file_path = get_homedir() / 'own_user_agents' / str(yesterday.year) / f'{yesterday.month:02}'
|
||||||
|
safe_create_dir(self_generated_ua_file_path)
|
||||||
|
self_generated_ua_file = self_generated_ua_file_path / f'{yesterday.isoformat()}.json'
|
||||||
|
if self_generated_ua_file.exists():
|
||||||
|
return
|
||||||
|
redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
||||||
|
entries = redis.zrevrange(f'user_agents|{yesterday.isoformat()}', 0, -1)
|
||||||
|
if not entries:
|
||||||
|
return
|
||||||
|
|
||||||
|
to_store: Dict[str, Any] = {'by_frequency': []}
|
||||||
|
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
||||||
|
for ua, _ in uas.most_common():
|
||||||
|
parsed_ua = UserAgent(ua)
|
||||||
|
if not parsed_ua.platform or not parsed_ua.browser:
|
||||||
|
continue
|
||||||
|
if parsed_ua.platform not in to_store:
|
||||||
|
to_store[parsed_ua.platform] = {}
|
||||||
|
if f'{parsed_ua.browser} {parsed_ua.version}' not in to_store[parsed_ua.platform]:
|
||||||
|
to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'] = []
|
||||||
|
to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'].append(parsed_ua.string)
|
||||||
|
to_store['by_frequency'].append({'os': parsed_ua.platform,
|
||||||
|
'browser': f'{parsed_ua.browser} {parsed_ua.version}',
|
||||||
|
'useragent': parsed_ua.string})
|
||||||
|
with self_generated_ua_file.open('w') as f:
|
||||||
|
json.dump(to_store, f, indent=2)
|
||||||
|
|
||||||
|
# Remove the UA / IP mapping.
|
||||||
|
redis.delete(f'user_agents|{yesterday.isoformat()}')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
p = Processing()
|
||||||
|
p.run(sleep_in_sec=3600 * 24)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -103,7 +103,7 @@ Run the following command (assuming you run the code from the clonned repository
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(64)
|
@lru_cache(64)
|
||||||
def get_capture_dir() -> Path:
|
def get_captures_dir() -> Path:
|
||||||
capture_dir = get_homedir() / 'scraped'
|
capture_dir = get_homedir() / 'scraped'
|
||||||
safe_create_dir(capture_dir)
|
safe_create_dir(capture_dir)
|
||||||
return capture_dir
|
return capture_dir
|
||||||
|
@ -365,13 +365,15 @@ def get_useragent_for_requests():
|
||||||
|
|
||||||
|
|
||||||
def reload_uuids_index() -> None:
|
def reload_uuids_index() -> None:
|
||||||
recent_uuids = {}
|
recent_uuids: Dict[str, str] = {}
|
||||||
for uuid_path in sorted(get_capture_dir().glob('*/uuid'), reverse=True):
|
for uuid_path in sorted(get_captures_dir().glob('*/uuid'), reverse=True):
|
||||||
with uuid_path.open() as f:
|
with uuid_path.open() as f:
|
||||||
uuid = f.read()
|
uuid = f.read()
|
||||||
recent_uuids[uuid] = str(uuid_path.parent)
|
recent_uuids[uuid] = str(uuid_path.parent)
|
||||||
|
if not recent_uuids:
|
||||||
|
return None
|
||||||
r = Redis(unix_socket_path=get_socket_path('cache'))
|
r = Redis(unix_socket_path=get_socket_path('cache'))
|
||||||
p = r.pipeline()
|
p = r.pipeline()
|
||||||
p.delete('lookup_dirs')
|
p.delete('lookup_dirs')
|
||||||
p.hset('lookup_dirs', mapping=recent_uuids)
|
p.hset('lookup_dirs', mapping=recent_uuids) # type: ignore
|
||||||
p.execute()
|
p.execute()
|
||||||
|
|
|
@ -3,8 +3,8 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import base64
|
import base64
|
||||||
from collections import defaultdict, Counter
|
from collections import defaultdict
|
||||||
from datetime import datetime, date, timedelta
|
from datetime import datetime, date
|
||||||
from email.message import EmailMessage
|
from email.message import EmailMessage
|
||||||
from io import BufferedIOBase, BytesIO
|
from io import BufferedIOBase, BytesIO
|
||||||
import ipaddress
|
import ipaddress
|
||||||
|
@ -106,37 +106,32 @@ class Lookyloo():
|
||||||
today = date.today().isoformat()
|
today = date.today().isoformat()
|
||||||
self.redis.zincrby(f'user_agents|{today}', 1, f'{remote_ip}|{user_agent}')
|
self.redis.zincrby(f'user_agents|{today}', 1, f'{remote_ip}|{user_agent}')
|
||||||
|
|
||||||
def build_ua_file(self) -> None:
|
def _get_capture_dir(self, capture_uuid: str, /) -> Path:
|
||||||
'''Build a file in a format compatible with the capture page'''
|
'''Use the cache to get a capture directory from a capture UUID'''
|
||||||
yesterday = (date.today() - timedelta(days=1))
|
capture_dir: Optional[Union[str, Path]]
|
||||||
self_generated_ua_file_path = get_homedir() / 'own_user_agents' / str(yesterday.year) / f'{yesterday.month:02}'
|
if capture_uuid in self._captures_index:
|
||||||
safe_create_dir(self_generated_ua_file_path)
|
capture_dir = self._captures_index[capture_uuid].capture_dir
|
||||||
self_generated_ua_file = self_generated_ua_file_path / f'{yesterday.isoformat()}.json'
|
if capture_dir.exists():
|
||||||
if self_generated_ua_file.exists():
|
return capture_dir
|
||||||
return
|
self.redis.delete(capture_dir)
|
||||||
entries = self.redis.zrevrange(f'user_agents|{yesterday.isoformat()}', 0, -1)
|
self._captures_index.pop(capture_uuid)
|
||||||
if not entries:
|
capture_dir = self.redis.hget('lookup_dirs', capture_uuid)
|
||||||
return
|
if capture_dir and not Path(capture_dir).exists():
|
||||||
|
# The capture was either removed or archived, cleaning up
|
||||||
to_store: Dict[str, Any] = {'by_frequency': []}
|
self.redis.hdel('lookup_dirs', capture_uuid)
|
||||||
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
capture_dir = None
|
||||||
for ua, _ in uas.most_common():
|
if not capture_dir:
|
||||||
parsed_ua = UserAgent(ua)
|
# Try in the archive
|
||||||
if not parsed_ua.platform or not parsed_ua.browser:
|
capture_dir = self.redis.hget('lookup_dirs_archived', capture_uuid)
|
||||||
continue
|
if not capture_dir:
|
||||||
if parsed_ua.platform not in to_store:
|
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||||
to_store[parsed_ua.platform] = {}
|
to_return = Path(capture_dir)
|
||||||
if f'{parsed_ua.browser} {parsed_ua.version}' not in to_store[parsed_ua.platform]:
|
if not to_return.exists():
|
||||||
to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'] = []
|
# The capture was removed, remove the UUID
|
||||||
to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'].append(parsed_ua.string)
|
self.redis.hdel('lookup_dirs_archived', capture_uuid)
|
||||||
to_store['by_frequency'].append({'os': parsed_ua.platform,
|
self.logger.warning(f'UUID ({capture_uuid}) linked to a missing directory ({capture_dir}). Removed now.')
|
||||||
'browser': f'{parsed_ua.browser} {parsed_ua.version}',
|
raise NoValidHarFile(f'UUID ({capture_uuid}) linked to a missing directory ({capture_dir}). Removed now.')
|
||||||
'useragent': parsed_ua.string})
|
return to_return
|
||||||
with self_generated_ua_file.open('w') as f:
|
|
||||||
json.dump(to_store, f, indent=2)
|
|
||||||
|
|
||||||
# Remove the UA / IP mapping.
|
|
||||||
self.redis.delete(f'user_agents|{yesterday.isoformat()}')
|
|
||||||
|
|
||||||
def _cache_capture(self, capture_uuid: str, /) -> CrawledTree:
|
def _cache_capture(self, capture_uuid: str, /) -> CrawledTree:
|
||||||
'''Generate the pickle, set the cache, add capture in the indexes'''
|
'''Generate the pickle, set the cache, add capture in the indexes'''
|
||||||
|
@ -197,6 +192,85 @@ class Lookyloo():
|
||||||
lock_file.unlink(missing_ok=True)
|
lock_file.unlink(missing_ok=True)
|
||||||
return ct
|
return ct
|
||||||
|
|
||||||
|
def _set_capture_cache(self, capture_dir: Path) -> Dict[str, Any]:
|
||||||
|
'''Populate the redis cache for a capture. Mostly used on the index page.
|
||||||
|
NOTE: Doesn't require the pickle.'''
|
||||||
|
with (capture_dir / 'uuid').open() as f:
|
||||||
|
uuid = f.read().strip()
|
||||||
|
|
||||||
|
har_files = sorted(capture_dir.glob('*.har'))
|
||||||
|
|
||||||
|
error_cache: Dict[str, str] = {}
|
||||||
|
if (capture_dir / 'error.txt').exists():
|
||||||
|
# Something went wrong
|
||||||
|
with (capture_dir / 'error.txt').open() as _error:
|
||||||
|
content = _error.read()
|
||||||
|
try:
|
||||||
|
error_to_cache = json.loads(content)
|
||||||
|
if isinstance(error_to_cache, dict) and error_to_cache.get('details'):
|
||||||
|
error_to_cache = error_to_cache.get('details')
|
||||||
|
except json.decoder.JSONDecodeError:
|
||||||
|
# old format
|
||||||
|
error_to_cache = content
|
||||||
|
error_cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}'
|
||||||
|
|
||||||
|
fatal_error = False
|
||||||
|
if har_files:
|
||||||
|
try:
|
||||||
|
har = HarFile(har_files[0], uuid)
|
||||||
|
except Har2TreeError as e:
|
||||||
|
error_cache['error'] = str(e)
|
||||||
|
fatal_error = True
|
||||||
|
else:
|
||||||
|
error_cache['error'] = f'No har files in {capture_dir.name}'
|
||||||
|
fatal_error = True
|
||||||
|
|
||||||
|
if (capture_dir / 'categories').exists():
|
||||||
|
with (capture_dir / 'categories').open() as _categories:
|
||||||
|
categories = [c.strip() for c in _categories.readlines()]
|
||||||
|
else:
|
||||||
|
categories = []
|
||||||
|
|
||||||
|
p = self.redis.pipeline()
|
||||||
|
p.hset('lookup_dirs', uuid, str(capture_dir))
|
||||||
|
if error_cache:
|
||||||
|
if 'HTTP Error' not in error_cache['error']:
|
||||||
|
self.logger.warning(error_cache['error'])
|
||||||
|
p.hmset(str(capture_dir), error_cache)
|
||||||
|
|
||||||
|
if not fatal_error:
|
||||||
|
redirects = har.initial_redirects
|
||||||
|
incomplete_redirects = False
|
||||||
|
if redirects and har.need_tree_redirects:
|
||||||
|
# load tree from disk, get redirects
|
||||||
|
ct = load_pickle_tree(capture_dir)
|
||||||
|
if ct:
|
||||||
|
redirects = ct.redirects
|
||||||
|
else:
|
||||||
|
# Pickle not available
|
||||||
|
incomplete_redirects = True
|
||||||
|
|
||||||
|
cache: Dict[str, Union[str, int]] = {'uuid': uuid,
|
||||||
|
'title': har.initial_title,
|
||||||
|
'timestamp': har.initial_start_time,
|
||||||
|
'url': har.root_url,
|
||||||
|
'redirects': json.dumps(redirects),
|
||||||
|
'categories': json.dumps(categories),
|
||||||
|
'capture_dir': str(capture_dir),
|
||||||
|
'incomplete_redirects': 1 if incomplete_redirects else 0}
|
||||||
|
if (capture_dir / 'no_index').exists(): # If the folders claims anonymity
|
||||||
|
cache['no_index'] = 1
|
||||||
|
|
||||||
|
if (capture_dir / 'parent').exists(): # The capture was initiated from an other one
|
||||||
|
with (capture_dir / 'parent').open() as f:
|
||||||
|
cache['parent'] = f.read().strip()
|
||||||
|
|
||||||
|
p.hmset(str(capture_dir), cache)
|
||||||
|
p.execute()
|
||||||
|
# If the cache is re-created for some reason, pop from the local cache.
|
||||||
|
self._captures_index.pop(uuid, None)
|
||||||
|
return cache
|
||||||
|
|
||||||
def _build_cname_chain(self, known_cnames: Dict[str, Optional[str]], hostname) -> List[str]:
|
def _build_cname_chain(self, known_cnames: Dict[str, Optional[str]], hostname) -> List[str]:
|
||||||
'''Returns a list of CNAMEs starting from one hostname.
|
'''Returns a list of CNAMEs starting from one hostname.
|
||||||
The CNAMEs resolutions are made in `_resolve_dns`. A hostname can have a CNAME entry
|
The CNAMEs resolutions are made in `_resolve_dns`. A hostname can have a CNAME entry
|
||||||
|
@ -259,17 +333,6 @@ class Lookyloo():
|
||||||
json.dump(host_ips, f)
|
json.dump(host_ips, f)
|
||||||
return ct
|
return ct
|
||||||
|
|
||||||
def get_crawled_tree(self, capture_uuid: str, /) -> CrawledTree:
|
|
||||||
'''Get the generated tree in ETE Toolkit format.
|
|
||||||
Loads the pickle if it exists, creates it otherwise.'''
|
|
||||||
capture_dir = self._get_capture_dir(capture_uuid)
|
|
||||||
ct = load_pickle_tree(capture_dir)
|
|
||||||
if not ct:
|
|
||||||
ct = self._cache_capture(capture_uuid)
|
|
||||||
if not ct:
|
|
||||||
raise NoValidHarFile(f'Unable to get tree from {capture_dir}')
|
|
||||||
return ct
|
|
||||||
|
|
||||||
def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str,
|
def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str,
|
||||||
legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]):
|
legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]):
|
||||||
'''Adds context information to a capture or a URL node'''
|
'''Adds context information to a capture or a URL node'''
|
||||||
|
@ -449,85 +512,6 @@ class Lookyloo():
|
||||||
to_return[event_id].update(values)
|
to_return[event_id].update(values)
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def _set_capture_cache(self, capture_dir: Path) -> Dict[str, Any]:
|
|
||||||
'''Populate the redis cache for a capture. Mostly used on the index page.
|
|
||||||
NOTE: Doesn't require the pickle.'''
|
|
||||||
with (capture_dir / 'uuid').open() as f:
|
|
||||||
uuid = f.read().strip()
|
|
||||||
|
|
||||||
har_files = sorted(capture_dir.glob('*.har'))
|
|
||||||
|
|
||||||
error_cache: Dict[str, str] = {}
|
|
||||||
if (capture_dir / 'error.txt').exists():
|
|
||||||
# Something went wrong
|
|
||||||
with (capture_dir / 'error.txt').open() as _error:
|
|
||||||
content = _error.read()
|
|
||||||
try:
|
|
||||||
error_to_cache = json.loads(content)
|
|
||||||
if isinstance(error_to_cache, dict) and error_to_cache.get('details'):
|
|
||||||
error_to_cache = error_to_cache.get('details')
|
|
||||||
except json.decoder.JSONDecodeError:
|
|
||||||
# old format
|
|
||||||
error_to_cache = content
|
|
||||||
error_cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}'
|
|
||||||
|
|
||||||
fatal_error = False
|
|
||||||
if har_files:
|
|
||||||
try:
|
|
||||||
har = HarFile(har_files[0], uuid)
|
|
||||||
except Har2TreeError as e:
|
|
||||||
error_cache['error'] = str(e)
|
|
||||||
fatal_error = True
|
|
||||||
else:
|
|
||||||
error_cache['error'] = f'No har files in {capture_dir.name}'
|
|
||||||
fatal_error = True
|
|
||||||
|
|
||||||
if (capture_dir / 'categories').exists():
|
|
||||||
with (capture_dir / 'categories').open() as _categories:
|
|
||||||
categories = [c.strip() for c in _categories.readlines()]
|
|
||||||
else:
|
|
||||||
categories = []
|
|
||||||
|
|
||||||
p = self.redis.pipeline()
|
|
||||||
p.hset('lookup_dirs', uuid, str(capture_dir))
|
|
||||||
if error_cache:
|
|
||||||
if 'HTTP Error' not in error_cache['error']:
|
|
||||||
self.logger.warning(error_cache['error'])
|
|
||||||
p.hmset(str(capture_dir), error_cache)
|
|
||||||
|
|
||||||
if not fatal_error:
|
|
||||||
redirects = har.initial_redirects
|
|
||||||
incomplete_redirects = False
|
|
||||||
if redirects and har.need_tree_redirects:
|
|
||||||
# load tree from disk, get redirects
|
|
||||||
ct = load_pickle_tree(capture_dir)
|
|
||||||
if ct:
|
|
||||||
redirects = ct.redirects
|
|
||||||
else:
|
|
||||||
# Pickle not available
|
|
||||||
incomplete_redirects = True
|
|
||||||
|
|
||||||
cache: Dict[str, Union[str, int]] = {'uuid': uuid,
|
|
||||||
'title': har.initial_title,
|
|
||||||
'timestamp': har.initial_start_time,
|
|
||||||
'url': har.root_url,
|
|
||||||
'redirects': json.dumps(redirects),
|
|
||||||
'categories': json.dumps(categories),
|
|
||||||
'capture_dir': str(capture_dir),
|
|
||||||
'incomplete_redirects': 1 if incomplete_redirects else 0}
|
|
||||||
if (capture_dir / 'no_index').exists(): # If the folders claims anonymity
|
|
||||||
cache['no_index'] = 1
|
|
||||||
|
|
||||||
if (capture_dir / 'parent').exists(): # The capture was initiated from an other one
|
|
||||||
with (capture_dir / 'parent').open() as f:
|
|
||||||
cache['parent'] = f.read().strip()
|
|
||||||
|
|
||||||
p.hmset(str(capture_dir), cache)
|
|
||||||
p.execute()
|
|
||||||
# If the cache is re-created for some reason, pop from the local cache.
|
|
||||||
self._captures_index.pop(uuid, None)
|
|
||||||
return cache
|
|
||||||
|
|
||||||
def hide_capture(self, capture_uuid: str, /) -> None:
|
def hide_capture(self, capture_uuid: str, /) -> None:
|
||||||
"""Add the capture in the hidden pool (not shown on the front page)
|
"""Add the capture in the hidden pool (not shown on the front page)
|
||||||
NOTE: it won't remove the correlations until they are rebuilt.
|
NOTE: it won't remove the correlations until they are rebuilt.
|
||||||
|
@ -599,32 +583,16 @@ class Lookyloo():
|
||||||
self.logger.warning(f'Cache ({capture_dir}) is invalid ({e}): {json.dumps(cached, indent=2)}')
|
self.logger.warning(f'Cache ({capture_dir}) is invalid ({e}): {json.dumps(cached, indent=2)}')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _get_capture_dir(self, capture_uuid: str, /) -> Path:
|
def get_crawled_tree(self, capture_uuid: str, /) -> CrawledTree:
|
||||||
'''Use the cache to get a capture directory from a capture UUID'''
|
'''Get the generated tree in ETE Toolkit format.
|
||||||
capture_dir: Optional[Union[str, Path]]
|
Loads the pickle if it exists, creates it otherwise.'''
|
||||||
if capture_uuid in self._captures_index:
|
capture_dir = self._get_capture_dir(capture_uuid)
|
||||||
capture_dir = self._captures_index[capture_uuid].capture_dir
|
ct = load_pickle_tree(capture_dir)
|
||||||
if capture_dir.exists():
|
if not ct:
|
||||||
return capture_dir
|
ct = self._cache_capture(capture_uuid)
|
||||||
self.redis.delete(capture_dir)
|
if not ct:
|
||||||
self._captures_index.pop(capture_uuid)
|
raise NoValidHarFile(f'Unable to get tree from {capture_dir}')
|
||||||
capture_dir = self.redis.hget('lookup_dirs', capture_uuid)
|
return ct
|
||||||
if capture_dir and not Path(capture_dir).exists():
|
|
||||||
# The capture was either removed or archived, cleaning up
|
|
||||||
self.redis.hdel('lookup_dirs', capture_uuid)
|
|
||||||
capture_dir = None
|
|
||||||
if not capture_dir:
|
|
||||||
# Try in the archive
|
|
||||||
capture_dir = self.redis.hget('lookup_dirs_archived', capture_uuid)
|
|
||||||
if not capture_dir:
|
|
||||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
|
||||||
to_return = Path(capture_dir)
|
|
||||||
if not to_return.exists():
|
|
||||||
# The capture was removed, remove the UUID
|
|
||||||
self.redis.hdel('lookup_dirs_archived', capture_uuid)
|
|
||||||
self.logger.warning(f'UUID ({capture_uuid}) linked to a missing directory ({capture_dir}). Removed now.')
|
|
||||||
raise NoValidHarFile(f'UUID ({capture_uuid}) linked to a missing directory ({capture_dir}). Removed now.')
|
|
||||||
return to_return
|
|
||||||
|
|
||||||
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
|
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
|
||||||
redis = self.redis # use a single connection
|
redis = self.redis # use a single connection
|
||||||
|
|
|
@ -32,6 +32,7 @@ rebuild_caches = "bin.rebuild_caches:main"
|
||||||
update = "bin.update:main"
|
update = "bin.update:main"
|
||||||
background_indexer = "bin.background_indexer:main"
|
background_indexer = "bin.background_indexer:main"
|
||||||
archiver = "bin.archiver:main"
|
archiver = "bin.archiver:main"
|
||||||
|
processing = "bin.background_processing:main"
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
|
|
|
@ -21,7 +21,7 @@ from werkzeug.security import check_password_hash
|
||||||
|
|
||||||
from pymisp import MISPEvent, MISPServerError
|
from pymisp import MISPEvent, MISPServerError
|
||||||
|
|
||||||
from lookyloo.helpers import (update_user_agents, get_user_agents, get_config,
|
from lookyloo.helpers import (get_user_agents, get_config,
|
||||||
get_taxonomies, load_cookies, CaptureStatus)
|
get_taxonomies, load_cookies, CaptureStatus)
|
||||||
from lookyloo.lookyloo import Lookyloo, Indexing
|
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||||
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
||||||
|
@ -679,10 +679,6 @@ def index():
|
||||||
if request.method == 'HEAD':
|
if request.method == 'HEAD':
|
||||||
# Just returns ack if the webserver is running
|
# Just returns ack if the webserver is running
|
||||||
return 'Ack'
|
return 'Ack'
|
||||||
if use_own_ua:
|
|
||||||
lookyloo.build_ua_file()
|
|
||||||
else:
|
|
||||||
update_user_agents()
|
|
||||||
show_error, category = get_index_params(request)
|
show_error, category = get_index_params(request)
|
||||||
return index_generic(show_error=show_error)
|
return index_generic(show_error=show_error)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue