fix: Attempt to fix the stats page

pull/476/head
Raphaël Vinot 2022-07-29 11:02:51 +02:00
parent 9b4d56a0b6
commit b1d30cadcf
2 changed files with 18 additions and 12 deletions

View File

@ -117,6 +117,7 @@ class CapturesIndex(Mapping):
and not cc.incomplete_redirects):
self.__cache[uuid] = cc
return self.__cache[uuid]
# The tree isn't cached yet
try:
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
except TreeNeedsRebuild:
@ -230,7 +231,7 @@ class CapturesIndex(Mapping):
lock_file.unlink(missing_ok=True)
return tree
def _set_capture_cache(self, capture_dir: Path, tree: Optional[CrawledTree]=None) -> CaptureCache:
def _set_capture_cache(self, capture_dir: Path, tree: CrawledTree) -> CaptureCache:
'''Populate the redis cache for a capture. Mostly used on the index page.
NOTE: Doesn't require the pickle.'''
with (capture_dir / 'uuid').open() as f:
@ -257,17 +258,9 @@ class CapturesIndex(Mapping):
cache['timestamp'] = har.initial_start_time
cache['url'] = har.root_url
if har.initial_redirects and har.need_tree_redirects:
if not tree:
# try to load tree from disk
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
# get redirects
if tree:
cache['redirects'] = json.dumps(tree.redirects)
cache['incomplete_redirects'] = 0
else:
# Pickle not available
cache['redirects'] = json.dumps(har.initial_redirects)
cache['incomplete_redirects'] = 1
cache['redirects'] = json.dumps(tree.redirects)
cache['incomplete_redirects'] = 0
else:
cache['redirects'] = json.dumps(har.initial_redirects)
cache['incomplete_redirects'] = 0

View File

@ -968,7 +968,20 @@ class Lookyloo():
stats: Dict[int, Dict[int, Dict[str, Any]]] = {}
weeks_stats: Dict[int, Dict] = {}
for cache in self.sorted_capture_cache():
# Load the archived captures from redis
archived: List[CaptureCache] = []
p = self.redis.pipeline()
for directory in self.redis.hvals('lookup_dirs_archived'):
p.hgetall(directory)
for cache in p.execute():
if not cache:
continue
try:
archived.append(CaptureCache(cache))
except Exception:
continue
for cache in self.sorted_capture_cache() + archived:
date_submission: datetime = cache.timestamp
if date_submission.year not in stats: