chg: Improve initial caching.

pull/116/head
Raphaël Vinot 2020-10-29 23:25:20 +01:00
parent 69f65c9a87
commit 8b1e3585ea
2 changed files with 35 additions and 30 deletions

View File

@ -36,7 +36,7 @@ def main():
if cache.get('no_index') is not None: if cache.get('no_index') is not None:
index = False index = False
# NOTE: these methods do nothing if we just generated the pickle # NOTE: these methods do nothing if we just generated the pickle when calling lookyloo.get_crawled_tree
if index: if index:
indexing.index_cookies_capture(tree) indexing.index_cookies_capture(tree)
indexing.index_body_hashes_capture(tree) indexing.index_body_hashes_capture(tree)

View File

@ -367,7 +367,7 @@ class Lookyloo():
to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url) to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url)
return to_return return to_return
def _set_capture_cache(self, capture_dir: Path, force: bool=False) -> None: def _set_capture_cache(self, capture_dir: Path, force: bool=False, redis_pipeline: Optional[Redis]=None) -> None:
if force or not self.redis.exists(str(capture_dir)): if force or not self.redis.exists(str(capture_dir)):
# (re)build cache # (re)build cache
pass pass
@ -404,14 +404,16 @@ class Lookyloo():
error_cache['error'] = f'No har files in {capture_dir.name}' error_cache['error'] = f'No har files in {capture_dir.name}'
fatal_error = True fatal_error = True
if not redis_pipeline:
p = self.redis.pipeline()
else:
p = redis_pipeline
p.hset('lookup_dirs', uuid, str(capture_dir))
if error_cache: if error_cache:
self.logger.warning(error_cache['error']) self.logger.warning(error_cache['error'])
self.redis.hmset(str(capture_dir), error_cache) # type: ignore p.hmset(str(capture_dir), error_cache)
self.redis.hset('lookup_dirs', uuid, str(capture_dir))
if fatal_error:
return
if not fatal_error:
redirects = har.initial_redirects redirects = har.initial_redirects
incomplete_redirects = False incomplete_redirects = False
if redirects and har.need_tree_redirects: if redirects and har.need_tree_redirects:
@ -433,8 +435,9 @@ class Lookyloo():
if (capture_dir / 'no_index').exists(): # If the folders claims anonymity if (capture_dir / 'no_index').exists(): # If the folders claims anonymity
cache['no_index'] = 1 cache['no_index'] = 1
self.redis.hmset(str(capture_dir), cache) # type: ignore p.hmset(str(capture_dir), cache)
self.redis.hset('lookup_dirs', uuid, str(capture_dir)) if not redis_pipeline:
p.execute()
def hide_capture(self, capture_uuid: str) -> None: def hide_capture(self, capture_uuid: str) -> None:
"""Add the capture in the hidden pool (not shown on the front page) """Add the capture in the hidden pool (not shown on the front page)
@ -493,10 +496,12 @@ class Lookyloo():
return {} return {}
def _init_existing_dumps(self) -> None: def _init_existing_dumps(self) -> None:
p = self.redis.pipeline()
for capture_dir in self.capture_dirs: for capture_dir in self.capture_dirs:
if capture_dir.exists(): if capture_dir.exists():
self._set_capture_cache(capture_dir) self._set_capture_cache(capture_dir, redis_pipeline=p)
self.redis.set('cache_loaded', 1) p.set('cache_loaded', 1)
p.execute()
@property @property
def capture_dirs(self) -> List[Path]: def capture_dirs(self) -> List[Path]: