diff --git a/bin/archiver.py b/bin/archiver.py index dbb157c1..fac00b04 100755 --- a/bin/archiver.py +++ b/bin/archiver.py @@ -129,7 +129,7 @@ class Archiver(AbstractManager): for index in self.archived_captures_dir.rglob('index'): with index.open('r') as _f: for uuid, dirname in csv.reader(_f): - for har in (index.parent / dirname).glob('*.har'): + for har in (index.parent / dirname).rglob('*.har'): if not har.exists(): continue with har.open('rb') as f_in: diff --git a/bin/background_indexer.py b/bin/background_indexer.py index b5689f23..638542d7 100755 --- a/bin/background_indexer.py +++ b/bin/background_indexer.py @@ -30,7 +30,9 @@ class BackgroundIndexer(AbstractManager): def _build_missing_pickles(self): for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True): - if (uuid_path.parent / 'tree.pickle').exists() or not list(uuid_path.parent.rglob('*.har')): + if ((uuid_path.parent / 'tree.pickle').exists() + or not list(uuid_path.parent.rglob('*.har')) + or not list(uuid_path.parent.rglob('*.har.gz'))): continue lock_file = uuid_path.parent / 'lock' if lock_file.exists(): diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index bcc6d032..a19b7f32 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -89,7 +89,7 @@ def load_pickle_tree(capture_dir: Path, last_mod_time: int) -> CrawledTree: remove_pickle_tree(capture_dir) except Exception: remove_pickle_tree(capture_dir) - if list(capture_dir.rglob('*.har')): + if list(capture_dir.rglob('*.har')) or list(capture_dir.rglob('*.har.gz')): raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.') # The tree doesn't need to be rebuilt if there are no HAR files. raise NoValidHarFile("Couldn't find HAR files") @@ -208,7 +208,8 @@ class CapturesIndex(Mapping): time.sleep(5) return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime) - har_files = sorted(capture_dir.glob('*.har')) + if not (har_files := sorted(capture_dir.glob('*.har'))): + har_files = sorted(capture_dir.glob('*.har.gz')) try: tree = CrawledTree(har_files, uuid) self.__resolve_dns(tree) @@ -269,7 +270,9 @@ class CapturesIndex(Mapping): error_to_cache = content cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}' - if (har_files := sorted(capture_dir.rglob('*.har'))): + if not (har_files := sorted(capture_dir.rglob('*.har'))): + har_files = sorted(capture_dir.rglob('*.har.gz')) + if har_files: try: har = HarFile(har_files[0], uuid) cache['title'] = har.initial_title