From 926c0da23ef5af1f869a3079c18d76fe5168f9ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 12 Mar 2024 12:02:10 +0100 Subject: [PATCH] chg: Disable index cache for backgroupd processes --- bin/background_build_captures.py | 2 +- bin/background_indexer.py | 2 +- lookyloo/capturecache.py | 11 +++++++++-- lookyloo/lookyloo.py | 9 +++++++-- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/bin/background_build_captures.py b/bin/background_build_captures.py index 53847cb..abf27ce 100755 --- a/bin/background_build_captures.py +++ b/bin/background_build_captures.py @@ -24,7 +24,7 @@ class BackgroundBuildCaptures(AbstractManager): def __init__(self, loglevel: int | None=None): super().__init__(loglevel) - self.lookyloo = Lookyloo() + self.lookyloo = Lookyloo(cache_max_size=1) self.script_name = 'background_build_captures' # make sure discarded captures dir exists self.captures_dir = get_captures_dir() diff --git a/bin/background_indexer.py b/bin/background_indexer.py index fcbd446..df9f440 100755 --- a/bin/background_indexer.py +++ b/bin/background_indexer.py @@ -21,7 +21,7 @@ class BackgroundIndexer(AbstractManager): def __init__(self, full: bool=False, loglevel: int | None=None): super().__init__(loglevel) - self.lookyloo = Lookyloo() + self.lookyloo = Lookyloo(cache_max_size=1) self.is_public_instance = get_config('generic', 'public_instance') self.full_indexer = full self.indexing = Indexing(full_index=self.full_indexer) diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index e83589f..eca6621 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -13,6 +13,7 @@ import signal import sys import time +from collections import OrderedDict from collections.abc import Mapping from datetime import datetime from functools import lru_cache, _CacheInfo as CacheInfo @@ -172,13 +173,14 @@ def serialize_sets(obj: Any) -> Any: class CapturesIndex(Mapping): # type: ignore[type-arg] - def __init__(self, redis: Redis, contextualizer: Context | None=None) -> None: # type: ignore[type-arg] + def __init__(self, redis: Redis, contextualizer: Context | None=None, maxsize: int | None=None) -> None: # type: ignore[type-arg] self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger.setLevel(get_config('generic', 'loglevel')) self.redis = redis self.indexing = Indexing() self.contextualizer = contextualizer - self.__cache: dict[str, CaptureCache] = {} + self.__cache_max_size = maxsize + self.__cache: dict[str, CaptureCache] = OrderedDict() self._quick_init() self.timeout = get_config('generic', 'max_tree_create_time') try: @@ -203,6 +205,8 @@ class CapturesIndex(Mapping): # type: ignore[type-arg] return set(self.__cache.keys()) def __getitem__(self, uuid: str) -> CaptureCache: + if self.__cache_max_size is not None and len(self.__cache) > self.__cache_max_size: + self.__cache.popitem() if uuid in self.__cache: if self.__cache[uuid].capture_dir.exists(): return self.__cache[uuid] @@ -251,6 +255,9 @@ class CapturesIndex(Mapping): # type: ignore[type-arg] def _quick_init(self) -> None: '''Initialize the cache with a list of UUIDs, with less back and forth with redis. Only get recent captures.''' + if self.__cache_max_size is not None: + self.logger.info('Cache max size set, skip quick init.') + return None p = self.redis.pipeline() has_new_cached_captures = False for uuid, directory in self.redis.hscan_iter('lookup_dirs'): diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index aee0b2c..8c9977b 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -77,7 +77,12 @@ class CaptureSettings(CaptureSettingsCore, total=False): class Lookyloo(): - def __init__(self) -> None: + def __init__(self, cache_max_size: int | None=None) -> None: + '''Initialize lookyloo. + :param cache_max_size: The maximum size of the cache. Alows to display captures metadata without getting it from redis + This cache is *not* useful for background indexing or pickle building, only for the front end. + So it should always be None *unless* we're running the background processes. + ''' self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger.setLevel(get_config('generic', 'loglevel')) self.user_agents = UserAgents() @@ -149,7 +154,7 @@ class Lookyloo(): self.context = Context() self.logger.info('Context initialized.') self.logger.info('Initializing index...') - self._captures_index = CapturesIndex(self.redis, self.context) + self._captures_index = CapturesIndex(self.redis, self.context, maxsize=cache_max_size) self.logger.info('Index initialized.') # init lacus