From b1d30cadcf827ec9ac069da3910d6ec7d37d7d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Fri, 29 Jul 2022 11:02:51 +0200 Subject: [PATCH] fix: Attempt to fix the stats page --- lookyloo/capturecache.py | 15 ++++----------- lookyloo/lookyloo.py | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index 290329e..2b464d1 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -117,6 +117,7 @@ class CapturesIndex(Mapping): and not cc.incomplete_redirects): self.__cache[uuid] = cc return self.__cache[uuid] + # The tree isn't cached yet try: tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime) except TreeNeedsRebuild: @@ -230,7 +231,7 @@ class CapturesIndex(Mapping): lock_file.unlink(missing_ok=True) return tree - def _set_capture_cache(self, capture_dir: Path, tree: Optional[CrawledTree]=None) -> CaptureCache: + def _set_capture_cache(self, capture_dir: Path, tree: CrawledTree) -> CaptureCache: '''Populate the redis cache for a capture. Mostly used on the index page. NOTE: Doesn't require the pickle.''' with (capture_dir / 'uuid').open() as f: @@ -257,17 +258,9 @@ class CapturesIndex(Mapping): cache['timestamp'] = har.initial_start_time cache['url'] = har.root_url if har.initial_redirects and har.need_tree_redirects: - if not tree: - # try to load tree from disk - tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime) # get redirects - if tree: - cache['redirects'] = json.dumps(tree.redirects) - cache['incomplete_redirects'] = 0 - else: - # Pickle not available - cache['redirects'] = json.dumps(har.initial_redirects) - cache['incomplete_redirects'] = 1 + cache['redirects'] = json.dumps(tree.redirects) + cache['incomplete_redirects'] = 0 else: cache['redirects'] = json.dumps(har.initial_redirects) cache['incomplete_redirects'] = 0 diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 6b4c134..7defd80 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -968,7 +968,20 @@ class Lookyloo(): stats: Dict[int, Dict[int, Dict[str, Any]]] = {} weeks_stats: Dict[int, Dict] = {} - for cache in self.sorted_capture_cache(): + # Load the archived captures from redis + archived: List[CaptureCache] = [] + p = self.redis.pipeline() + for directory in self.redis.hvals('lookup_dirs_archived'): + p.hgetall(directory) + for cache in p.execute(): + if not cache: + continue + try: + archived.append(CaptureCache(cache)) + except Exception: + continue + + for cache in self.sorted_capture_cache() + archived: date_submission: datetime = cache.timestamp if date_submission.year not in stats: