mirror of https://github.com/CIRCL/lookyloo
chg: Improve initial caching.
parent
69f65c9a87
commit
8b1e3585ea
|
@ -36,7 +36,7 @@ def main():
|
||||||
if cache.get('no_index') is not None:
|
if cache.get('no_index') is not None:
|
||||||
index = False
|
index = False
|
||||||
|
|
||||||
# NOTE: these methods do nothing if we just generated the pickle
|
# NOTE: these methods do nothing if we just generated the pickle when calling lookyloo.get_crawled_tree
|
||||||
if index:
|
if index:
|
||||||
indexing.index_cookies_capture(tree)
|
indexing.index_cookies_capture(tree)
|
||||||
indexing.index_body_hashes_capture(tree)
|
indexing.index_body_hashes_capture(tree)
|
||||||
|
|
|
@ -367,7 +367,7 @@ class Lookyloo():
|
||||||
to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url)
|
to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url)
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def _set_capture_cache(self, capture_dir: Path, force: bool=False) -> None:
|
def _set_capture_cache(self, capture_dir: Path, force: bool=False, redis_pipeline: Optional[Redis]=None) -> None:
|
||||||
if force or not self.redis.exists(str(capture_dir)):
|
if force or not self.redis.exists(str(capture_dir)):
|
||||||
# (re)build cache
|
# (re)build cache
|
||||||
pass
|
pass
|
||||||
|
@ -404,14 +404,16 @@ class Lookyloo():
|
||||||
error_cache['error'] = f'No har files in {capture_dir.name}'
|
error_cache['error'] = f'No har files in {capture_dir.name}'
|
||||||
fatal_error = True
|
fatal_error = True
|
||||||
|
|
||||||
|
if not redis_pipeline:
|
||||||
|
p = self.redis.pipeline()
|
||||||
|
else:
|
||||||
|
p = redis_pipeline
|
||||||
|
p.hset('lookup_dirs', uuid, str(capture_dir))
|
||||||
if error_cache:
|
if error_cache:
|
||||||
self.logger.warning(error_cache['error'])
|
self.logger.warning(error_cache['error'])
|
||||||
self.redis.hmset(str(capture_dir), error_cache) # type: ignore
|
p.hmset(str(capture_dir), error_cache)
|
||||||
self.redis.hset('lookup_dirs', uuid, str(capture_dir))
|
|
||||||
|
|
||||||
if fatal_error:
|
|
||||||
return
|
|
||||||
|
|
||||||
|
if not fatal_error:
|
||||||
redirects = har.initial_redirects
|
redirects = har.initial_redirects
|
||||||
incomplete_redirects = False
|
incomplete_redirects = False
|
||||||
if redirects and har.need_tree_redirects:
|
if redirects and har.need_tree_redirects:
|
||||||
|
@ -433,8 +435,9 @@ class Lookyloo():
|
||||||
if (capture_dir / 'no_index').exists(): # If the folders claims anonymity
|
if (capture_dir / 'no_index').exists(): # If the folders claims anonymity
|
||||||
cache['no_index'] = 1
|
cache['no_index'] = 1
|
||||||
|
|
||||||
self.redis.hmset(str(capture_dir), cache) # type: ignore
|
p.hmset(str(capture_dir), cache)
|
||||||
self.redis.hset('lookup_dirs', uuid, str(capture_dir))
|
if not redis_pipeline:
|
||||||
|
p.execute()
|
||||||
|
|
||||||
def hide_capture(self, capture_uuid: str) -> None:
|
def hide_capture(self, capture_uuid: str) -> None:
|
||||||
"""Add the capture in the hidden pool (not shown on the front page)
|
"""Add the capture in the hidden pool (not shown on the front page)
|
||||||
|
@ -493,10 +496,12 @@ class Lookyloo():
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _init_existing_dumps(self) -> None:
|
def _init_existing_dumps(self) -> None:
|
||||||
|
p = self.redis.pipeline()
|
||||||
for capture_dir in self.capture_dirs:
|
for capture_dir in self.capture_dirs:
|
||||||
if capture_dir.exists():
|
if capture_dir.exists():
|
||||||
self._set_capture_cache(capture_dir)
|
self._set_capture_cache(capture_dir, redis_pipeline=p)
|
||||||
self.redis.set('cache_loaded', 1)
|
p.set('cache_loaded', 1)
|
||||||
|
p.execute()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def capture_dirs(self) -> List[Path]:
|
def capture_dirs(self) -> List[Path]:
|
||||||
|
|
Loading…
Reference in New Issue