fix: Better error handling with unpickling, improve logging

pull/640/head
Raphaël Vinot 2023-03-16 12:45:58 +01:00
parent 9497060028
commit afd383cfc3
1 changed files with 17 additions and 14 deletions

View File

@ -14,6 +14,7 @@ import time
from collections.abc import Mapping
from datetime import datetime
from functools import lru_cache
from logging import Logger
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, Set
@ -77,7 +78,7 @@ class CaptureCache():
@property
def tree(self) -> CrawledTree:
return load_pickle_tree(self.capture_dir, self.capture_dir.stat().st_mtime)
return load_pickle_tree(self.capture_dir, self.capture_dir.stat().st_mtime, self.logger)
def remove_pickle_tree(capture_dir: Path) -> None:
@ -87,7 +88,7 @@ def remove_pickle_tree(capture_dir: Path) -> None:
@lru_cache(maxsize=256)
def load_pickle_tree(capture_dir: Path, last_mod_time: int) -> CrawledTree:
def load_pickle_tree(capture_dir: Path, last_mod_time: int, logger: Logger) -> CrawledTree:
pickle_file = capture_dir / 'tree.pickle'
pickle_file_gz = capture_dir / 'tree.pickle.gz'
tree = None
@ -98,20 +99,20 @@ def load_pickle_tree(capture_dir: Path, last_mod_time: int) -> CrawledTree:
elif pickle_file_gz.exists():
with gzip.open(pickle_file_gz, 'rb') as _pg:
tree = pickle.load(_pg)
except pickle.UnpicklingError as e:
except pickle.UnpicklingError:
remove_pickle_tree(capture_dir)
except EOFError:
remove_pickle_tree(capture_dir)
except Exception:
logger.exception('Unexpected exception when unpickling')
remove_pickle_tree(capture_dir)
if tree:
try:
if tree.root_hartree.har.path.exists():
return tree
else:
# The capture was moved.
remove_pickle_tree(capture_dir)
except EOFError as e:
remove_pickle_tree(capture_dir)
except Exception as e:
remove_pickle_tree(capture_dir)
if list(capture_dir.rglob('*.har')) or list(capture_dir.rglob('*.har.gz')):
raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.')
@ -253,7 +254,7 @@ class CapturesIndex(Mapping):
# The pickle is being created somewhere else, wait until it's done.
while is_locked(capture_dir):
time.sleep(5)
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime, self.logger)
if not (har_files := sorted(capture_dir.glob('*.har'))):
har_files = sorted(capture_dir.glob('*.har.gz'))
@ -319,7 +320,7 @@ class CapturesIndex(Mapping):
uuid = f.read().strip()
try:
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime, self.logger)
except NoValidHarFile:
self.logger.debug('Unable to rebuild the tree, the HAR files are broken.')
except TreeNeedsRebuild:
@ -447,6 +448,8 @@ class CapturesIndex(Mapping):
_all_ips = set()
for node in ct.root_hartree.hostname_tree.traverse():
if hasattr(node, 'hostname_is_ip'):
continue
if node.name not in host_cnames or node.name not in host_ips:
host_cnames[node.name] = ''
host_ips[node.name] = {'v4': set(), 'v6': set()}