mirror of https://github.com/CIRCL/lookyloo
fix: Better error handling with unpickling, improve logging
parent
9497060028
commit
afd383cfc3
|
@ -14,6 +14,7 @@ import time
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
from logging import Logger
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Tuple, Union, Set
|
from typing import Any, Dict, List, Optional, Tuple, Union, Set
|
||||||
|
|
||||||
|
@ -77,7 +78,7 @@ class CaptureCache():
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tree(self) -> CrawledTree:
|
def tree(self) -> CrawledTree:
|
||||||
return load_pickle_tree(self.capture_dir, self.capture_dir.stat().st_mtime)
|
return load_pickle_tree(self.capture_dir, self.capture_dir.stat().st_mtime, self.logger)
|
||||||
|
|
||||||
|
|
||||||
def remove_pickle_tree(capture_dir: Path) -> None:
|
def remove_pickle_tree(capture_dir: Path) -> None:
|
||||||
|
@ -87,7 +88,7 @@ def remove_pickle_tree(capture_dir: Path) -> None:
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=256)
|
@lru_cache(maxsize=256)
|
||||||
def load_pickle_tree(capture_dir: Path, last_mod_time: int) -> CrawledTree:
|
def load_pickle_tree(capture_dir: Path, last_mod_time: int, logger: Logger) -> CrawledTree:
|
||||||
pickle_file = capture_dir / 'tree.pickle'
|
pickle_file = capture_dir / 'tree.pickle'
|
||||||
pickle_file_gz = capture_dir / 'tree.pickle.gz'
|
pickle_file_gz = capture_dir / 'tree.pickle.gz'
|
||||||
tree = None
|
tree = None
|
||||||
|
@ -98,19 +99,19 @@ def load_pickle_tree(capture_dir: Path, last_mod_time: int) -> CrawledTree:
|
||||||
elif pickle_file_gz.exists():
|
elif pickle_file_gz.exists():
|
||||||
with gzip.open(pickle_file_gz, 'rb') as _pg:
|
with gzip.open(pickle_file_gz, 'rb') as _pg:
|
||||||
tree = pickle.load(_pg)
|
tree = pickle.load(_pg)
|
||||||
except pickle.UnpicklingError as e:
|
except pickle.UnpicklingError:
|
||||||
|
remove_pickle_tree(capture_dir)
|
||||||
|
except EOFError:
|
||||||
|
remove_pickle_tree(capture_dir)
|
||||||
|
except Exception:
|
||||||
|
logger.exception('Unexpected exception when unpickling')
|
||||||
remove_pickle_tree(capture_dir)
|
remove_pickle_tree(capture_dir)
|
||||||
|
|
||||||
if tree:
|
if tree:
|
||||||
try:
|
if tree.root_hartree.har.path.exists():
|
||||||
if tree.root_hartree.har.path.exists():
|
return tree
|
||||||
return tree
|
else:
|
||||||
else:
|
# The capture was moved.
|
||||||
# The capture was moved.
|
|
||||||
remove_pickle_tree(capture_dir)
|
|
||||||
except EOFError as e:
|
|
||||||
remove_pickle_tree(capture_dir)
|
|
||||||
except Exception as e:
|
|
||||||
remove_pickle_tree(capture_dir)
|
remove_pickle_tree(capture_dir)
|
||||||
|
|
||||||
if list(capture_dir.rglob('*.har')) or list(capture_dir.rglob('*.har.gz')):
|
if list(capture_dir.rglob('*.har')) or list(capture_dir.rglob('*.har.gz')):
|
||||||
|
@ -253,7 +254,7 @@ class CapturesIndex(Mapping):
|
||||||
# The pickle is being created somewhere else, wait until it's done.
|
# The pickle is being created somewhere else, wait until it's done.
|
||||||
while is_locked(capture_dir):
|
while is_locked(capture_dir):
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
|
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime, self.logger)
|
||||||
|
|
||||||
if not (har_files := sorted(capture_dir.glob('*.har'))):
|
if not (har_files := sorted(capture_dir.glob('*.har'))):
|
||||||
har_files = sorted(capture_dir.glob('*.har.gz'))
|
har_files = sorted(capture_dir.glob('*.har.gz'))
|
||||||
|
@ -319,7 +320,7 @@ class CapturesIndex(Mapping):
|
||||||
uuid = f.read().strip()
|
uuid = f.read().strip()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
|
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime, self.logger)
|
||||||
except NoValidHarFile:
|
except NoValidHarFile:
|
||||||
self.logger.debug('Unable to rebuild the tree, the HAR files are broken.')
|
self.logger.debug('Unable to rebuild the tree, the HAR files are broken.')
|
||||||
except TreeNeedsRebuild:
|
except TreeNeedsRebuild:
|
||||||
|
@ -447,6 +448,8 @@ class CapturesIndex(Mapping):
|
||||||
|
|
||||||
_all_ips = set()
|
_all_ips = set()
|
||||||
for node in ct.root_hartree.hostname_tree.traverse():
|
for node in ct.root_hartree.hostname_tree.traverse():
|
||||||
|
if hasattr(node, 'hostname_is_ip'):
|
||||||
|
continue
|
||||||
if node.name not in host_cnames or node.name not in host_ips:
|
if node.name not in host_cnames or node.name not in host_ips:
|
||||||
host_cnames[node.name] = ''
|
host_cnames[node.name] = ''
|
||||||
host_ips[node.name] = {'v4': set(), 'v6': set()}
|
host_ips[node.name] = {'v4': set(), 'v6': set()}
|
||||||
|
|
Loading…
Reference in New Issue