mirror of https://github.com/CIRCL/lookyloo
fix: Match compressed HAR as valid for rebuild
parent
dcc6b158b0
commit
a27683f090
|
@ -129,7 +129,7 @@ class Archiver(AbstractManager):
|
||||||
for index in self.archived_captures_dir.rglob('index'):
|
for index in self.archived_captures_dir.rglob('index'):
|
||||||
with index.open('r') as _f:
|
with index.open('r') as _f:
|
||||||
for uuid, dirname in csv.reader(_f):
|
for uuid, dirname in csv.reader(_f):
|
||||||
for har in (index.parent / dirname).glob('*.har'):
|
for har in (index.parent / dirname).rglob('*.har'):
|
||||||
if not har.exists():
|
if not har.exists():
|
||||||
continue
|
continue
|
||||||
with har.open('rb') as f_in:
|
with har.open('rb') as f_in:
|
||||||
|
|
|
@ -30,7 +30,9 @@ class BackgroundIndexer(AbstractManager):
|
||||||
|
|
||||||
def _build_missing_pickles(self):
|
def _build_missing_pickles(self):
|
||||||
for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True):
|
for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True):
|
||||||
if (uuid_path.parent / 'tree.pickle').exists() or not list(uuid_path.parent.rglob('*.har')):
|
if ((uuid_path.parent / 'tree.pickle').exists()
|
||||||
|
or not list(uuid_path.parent.rglob('*.har'))
|
||||||
|
or not list(uuid_path.parent.rglob('*.har.gz'))):
|
||||||
continue
|
continue
|
||||||
lock_file = uuid_path.parent / 'lock'
|
lock_file = uuid_path.parent / 'lock'
|
||||||
if lock_file.exists():
|
if lock_file.exists():
|
||||||
|
|
|
@ -89,7 +89,7 @@ def load_pickle_tree(capture_dir: Path, last_mod_time: int) -> CrawledTree:
|
||||||
remove_pickle_tree(capture_dir)
|
remove_pickle_tree(capture_dir)
|
||||||
except Exception:
|
except Exception:
|
||||||
remove_pickle_tree(capture_dir)
|
remove_pickle_tree(capture_dir)
|
||||||
if list(capture_dir.rglob('*.har')):
|
if list(capture_dir.rglob('*.har')) or list(capture_dir.rglob('*.har.gz')):
|
||||||
raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.')
|
raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.')
|
||||||
# The tree doesn't need to be rebuilt if there are no HAR files.
|
# The tree doesn't need to be rebuilt if there are no HAR files.
|
||||||
raise NoValidHarFile("Couldn't find HAR files")
|
raise NoValidHarFile("Couldn't find HAR files")
|
||||||
|
@ -208,7 +208,8 @@ class CapturesIndex(Mapping):
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
|
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
|
||||||
|
|
||||||
har_files = sorted(capture_dir.glob('*.har'))
|
if not (har_files := sorted(capture_dir.glob('*.har'))):
|
||||||
|
har_files = sorted(capture_dir.glob('*.har.gz'))
|
||||||
try:
|
try:
|
||||||
tree = CrawledTree(har_files, uuid)
|
tree = CrawledTree(har_files, uuid)
|
||||||
self.__resolve_dns(tree)
|
self.__resolve_dns(tree)
|
||||||
|
@ -269,7 +270,9 @@ class CapturesIndex(Mapping):
|
||||||
error_to_cache = content
|
error_to_cache = content
|
||||||
cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}'
|
cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}'
|
||||||
|
|
||||||
if (har_files := sorted(capture_dir.rglob('*.har'))):
|
if not (har_files := sorted(capture_dir.rglob('*.har'))):
|
||||||
|
har_files = sorted(capture_dir.rglob('*.har.gz'))
|
||||||
|
if har_files:
|
||||||
try:
|
try:
|
||||||
har = HarFile(har_files[0], uuid)
|
har = HarFile(har_files[0], uuid)
|
||||||
cache['title'] = har.initial_title
|
cache['title'] = har.initial_title
|
||||||
|
|
Loading…
Reference in New Issue