fix: Match compressed HAR as valid for rebuild

pull/528/head
Raphaël Vinot 2022-09-28 11:23:44 +02:00
parent dcc6b158b0
commit a27683f090
3 changed files with 10 additions and 5 deletions

View File

@ -129,7 +129,7 @@ class Archiver(AbstractManager):
for index in self.archived_captures_dir.rglob('index'): for index in self.archived_captures_dir.rglob('index'):
with index.open('r') as _f: with index.open('r') as _f:
for uuid, dirname in csv.reader(_f): for uuid, dirname in csv.reader(_f):
for har in (index.parent / dirname).glob('*.har'): for har in (index.parent / dirname).rglob('*.har'):
if not har.exists(): if not har.exists():
continue continue
with har.open('rb') as f_in: with har.open('rb') as f_in:

View File

@ -30,7 +30,9 @@ class BackgroundIndexer(AbstractManager):
def _build_missing_pickles(self): def _build_missing_pickles(self):
for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True): for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True):
if (uuid_path.parent / 'tree.pickle').exists() or not list(uuid_path.parent.rglob('*.har')): if ((uuid_path.parent / 'tree.pickle').exists()
or not list(uuid_path.parent.rglob('*.har'))
or not list(uuid_path.parent.rglob('*.har.gz'))):
continue continue
lock_file = uuid_path.parent / 'lock' lock_file = uuid_path.parent / 'lock'
if lock_file.exists(): if lock_file.exists():

View File

@ -89,7 +89,7 @@ def load_pickle_tree(capture_dir: Path, last_mod_time: int) -> CrawledTree:
remove_pickle_tree(capture_dir) remove_pickle_tree(capture_dir)
except Exception: except Exception:
remove_pickle_tree(capture_dir) remove_pickle_tree(capture_dir)
if list(capture_dir.rglob('*.har')): if list(capture_dir.rglob('*.har')) or list(capture_dir.rglob('*.har.gz')):
raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.') raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.')
# The tree doesn't need to be rebuilt if there are no HAR files. # The tree doesn't need to be rebuilt if there are no HAR files.
raise NoValidHarFile("Couldn't find HAR files") raise NoValidHarFile("Couldn't find HAR files")
@ -208,7 +208,8 @@ class CapturesIndex(Mapping):
time.sleep(5) time.sleep(5)
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime) return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
har_files = sorted(capture_dir.glob('*.har')) if not (har_files := sorted(capture_dir.glob('*.har'))):
har_files = sorted(capture_dir.glob('*.har.gz'))
try: try:
tree = CrawledTree(har_files, uuid) tree = CrawledTree(har_files, uuid)
self.__resolve_dns(tree) self.__resolve_dns(tree)
@ -269,7 +270,9 @@ class CapturesIndex(Mapping):
error_to_cache = content error_to_cache = content
cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}' cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}'
if (har_files := sorted(capture_dir.rglob('*.har'))): if not (har_files := sorted(capture_dir.rglob('*.har'))):
har_files = sorted(capture_dir.rglob('*.har.gz'))
if har_files:
try: try:
har = HarFile(har_files[0], uuid) har = HarFile(har_files[0], uuid)
cache['title'] = har.initial_title cache['title'] = har.initial_title