From 172c54bae61a8bea7abd0455d7b9bd03e2284f80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 22 May 2024 00:38:35 +0200 Subject: [PATCH] new: fast internal cache for index --- lookyloo/capturecache.py | 6 +++++- lookyloo/lookyloo.py | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index ab59029..5202982 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -28,7 +28,7 @@ from pyipasnhistory import IPASNHistory # type: ignore[attr-defined] from redis import Redis from .context import Context -from .helpers import get_captures_dir, is_locked +from .helpers import get_captures_dir, is_locked, make_ts_from_dirname from .indexing import Indexing from .default import LookylooException, try_make_file, get_config from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, TreeNeedsRebuild @@ -260,11 +260,13 @@ class CapturesIndex(Mapping): # type: ignore[type-arg] return None p = self.redis.pipeline() has_new_cached_captures = False + recent_captures = {} for uuid, directory in self.redis.hscan_iter('lookup_dirs'): if uuid in self.__cache: continue has_new_cached_captures = True p.hgetall(directory) + recent_captures[uuid] = make_ts_from_dirname(directory.rsplit('/', 1)[-1]).timestamp() if not has_new_cached_captures: return for cache in p.execute(): @@ -276,6 +278,7 @@ class CapturesIndex(Mapping): # type: ignore[type-arg] self.logger.warning(f'Unable to initialize the cache: {e}') continue self.__cache[cc.uuid] = cc + self.redis.zadd('recent_captures', recent_captures) def _get_capture_dir(self, uuid: str) -> str: # Try to get from the recent captures cache in redis @@ -285,6 +288,7 @@ class CapturesIndex(Mapping): # type: ignore[type-arg] return capture_dir # The capture was either removed or archived, cleaning up self.redis.hdel('lookup_dirs', uuid) + self.redis.zrem('recent_captures', uuid) self.redis.delete(capture_dir) # Try to get from the archived captures cache in redis diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 01f3c19..ebfeac7 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -58,7 +58,7 @@ from .exceptions import (MissingCaptureDirectory, from .helpers import (get_captures_dir, get_email_template, get_resources_hashes, get_taxonomies, uniq_domains, ParsedUserAgent, load_cookies, UserAgents, - get_useragent_for_requests, make_ts_from_dirname, load_takedown_filters + get_useragent_for_requests, load_takedown_filters ) from .modules import (MISPs, PhishingInitiative, UniversalWhois, UrlScan, VirusTotal, Phishtank, Hashlookup, @@ -510,8 +510,7 @@ class Lookyloo(): index_cut_time = cut_time if capture_uuids is None: - capture_uuids = {uuid for uuid, directory in self.redis.hscan_iter('lookup_dirs') - if make_ts_from_dirname(directory.rsplit('/', 1)[-1]) > index_cut_time} + capture_uuids = self.redis.zrevrangebyscore('recent_captures', '+inf', index_cut_time.timestamp()) # NOTE: we absolutely have to respect the cached_captures_only setting and # never overwrite it. This method is called to display the index # and if we try to display everything, including the non-cached entries, @@ -1503,3 +1502,4 @@ class Lookyloo(): _fw.write(favicon) self.redis.hset('lookup_dirs', uuid, str(dirpath)) + self.redis.zadd('recent_captures', {uuid: now.timestamp()})