fix: Better initialization of cache, avoid missing the error message.

pull/260/head
Raphaël Vinot 2021-09-08 16:34:54 +02:00
parent 39c7d14264
commit 5abc264159
1 changed files with 34 additions and 43 deletions

View File

@ -222,9 +222,7 @@ class Lookyloo():
with (capture_dir / 'uuid').open() as f: with (capture_dir / 'uuid').open() as f:
uuid = f.read().strip() uuid = f.read().strip()
har_files = sorted(capture_dir.glob('*.har')) cache: Dict[str, Union[str, int]] = {'uuid': uuid, 'capture_dir': str(capture_dir)}
cache: Dict[str, Union[str, int]] = {}
if (capture_dir / 'error.txt').exists(): if (capture_dir / 'error.txt').exists():
# Something went wrong # Something went wrong
with (capture_dir / 'error.txt').open() as _error: with (capture_dir / 'error.txt').open() as _error:
@ -238,57 +236,50 @@ class Lookyloo():
error_to_cache = content error_to_cache = content
cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}' cache['error'] = f'The capture {capture_dir.name} has an error: {error_to_cache}'
fatal_error = False if (har_files := sorted(capture_dir.glob('*.har'))):
if har_files:
try: try:
har = HarFile(har_files[0], uuid) har = HarFile(har_files[0], uuid)
cache['title'] = har.initial_title
cache['timestamp'] = har.initial_start_time
cache['url'] = har.root_url
if har.initial_redirects and har.need_tree_redirects:
# try to load tree from disk, get redirects
if (ct := load_pickle_tree(capture_dir)):
cache['redirects'] = json.dumps(ct.redirects)
cache['incomplete_redirects'] = 0
else:
# Pickle not available
cache['redirects'] = json.dumps(har.initial_redirects)
cache['incomplete_redirects'] = 1
else:
cache['redirects'] = json.dumps(har.initial_redirects)
cache['incomplete_redirects'] = 0
except Har2TreeError as e: except Har2TreeError as e:
cache['error'] = str(e) cache['error'] = str(e)
fatal_error = True
else: else:
cache['error'] = f'No har files in {capture_dir.name}' cache['error'] = f'No har files in {capture_dir.name}'
fatal_error = True
if (cache.get('error')
and isinstance(cache['error'], str)
and 'HTTP Error' not in cache['error']):
self.logger.warning(cache['error'])
if (capture_dir / 'categories').exists(): if (capture_dir / 'categories').exists():
with (capture_dir / 'categories').open() as _categories: with (capture_dir / 'categories').open() as _categories:
categories = [c.strip() for c in _categories.readlines()] cache['categories'] = json.dumps([c.strip() for c in _categories.readlines()])
else:
categories = []
p = self.redis.pipeline() if (capture_dir / 'no_index').exists():
p.hset('lookup_dirs', uuid, str(capture_dir)) # If the folders claims anonymity
if cache:
if isinstance(cache['error'], str) and 'HTTP Error' not in cache['error']:
self.logger.warning(cache['error'])
p.hmset(str(capture_dir), cache)
if not fatal_error:
redirects = har.initial_redirects
incomplete_redirects = False
if redirects and har.need_tree_redirects:
# load tree from disk, get redirects
ct = load_pickle_tree(capture_dir)
if ct:
redirects = ct.redirects
else:
# Pickle not available
incomplete_redirects = True
cache = {'uuid': uuid,
'title': har.initial_title,
'timestamp': har.initial_start_time,
'url': har.root_url,
'redirects': json.dumps(redirects),
'categories': json.dumps(categories),
'capture_dir': str(capture_dir),
'incomplete_redirects': 1 if incomplete_redirects else 0}
if (capture_dir / 'no_index').exists(): # If the folders claims anonymity
cache['no_index'] = 1 cache['no_index'] = 1
if (capture_dir / 'parent').exists(): # The capture was initiated from an other one if (capture_dir / 'parent').exists():
# The capture was initiated from an other one
with (capture_dir / 'parent').open() as f: with (capture_dir / 'parent').open() as f:
cache['parent'] = f.read().strip() cache['parent'] = f.read().strip()
p = self.redis.pipeline()
p.hset('lookup_dirs', uuid, str(capture_dir))
p.hmset(str(capture_dir), cache) p.hmset(str(capture_dir), cache)
p.execute() p.execute()
self._captures_index[uuid] = CaptureCache(cache) self._captures_index[uuid] = CaptureCache(cache)