chg: Disable index cache for backgroupd processes

pull/894/head
Raphaël Vinot 2024-03-12 12:02:10 +01:00
parent df1e3228c9
commit 926c0da23e
4 changed files with 18 additions and 6 deletions

View File

@ -24,7 +24,7 @@ class BackgroundBuildCaptures(AbstractManager):
def __init__(self, loglevel: int | None=None):
super().__init__(loglevel)
self.lookyloo = Lookyloo()
self.lookyloo = Lookyloo(cache_max_size=1)
self.script_name = 'background_build_captures'
# make sure discarded captures dir exists
self.captures_dir = get_captures_dir()

View File

@ -21,7 +21,7 @@ class BackgroundIndexer(AbstractManager):
def __init__(self, full: bool=False, loglevel: int | None=None):
super().__init__(loglevel)
self.lookyloo = Lookyloo()
self.lookyloo = Lookyloo(cache_max_size=1)
self.is_public_instance = get_config('generic', 'public_instance')
self.full_indexer = full
self.indexing = Indexing(full_index=self.full_indexer)

View File

@ -13,6 +13,7 @@ import signal
import sys
import time
from collections import OrderedDict
from collections.abc import Mapping
from datetime import datetime
from functools import lru_cache, _CacheInfo as CacheInfo
@ -172,13 +173,14 @@ def serialize_sets(obj: Any) -> Any:
class CapturesIndex(Mapping): # type: ignore[type-arg]
def __init__(self, redis: Redis, contextualizer: Context | None=None) -> None: # type: ignore[type-arg]
def __init__(self, redis: Redis, contextualizer: Context | None=None, maxsize: int | None=None) -> None: # type: ignore[type-arg]
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel'))
self.redis = redis
self.indexing = Indexing()
self.contextualizer = contextualizer
self.__cache: dict[str, CaptureCache] = {}
self.__cache_max_size = maxsize
self.__cache: dict[str, CaptureCache] = OrderedDict()
self._quick_init()
self.timeout = get_config('generic', 'max_tree_create_time')
try:
@ -203,6 +205,8 @@ class CapturesIndex(Mapping): # type: ignore[type-arg]
return set(self.__cache.keys())
def __getitem__(self, uuid: str) -> CaptureCache:
if self.__cache_max_size is not None and len(self.__cache) > self.__cache_max_size:
self.__cache.popitem()
if uuid in self.__cache:
if self.__cache[uuid].capture_dir.exists():
return self.__cache[uuid]
@ -251,6 +255,9 @@ class CapturesIndex(Mapping): # type: ignore[type-arg]
def _quick_init(self) -> None:
'''Initialize the cache with a list of UUIDs, with less back and forth with redis.
Only get recent captures.'''
if self.__cache_max_size is not None:
self.logger.info('Cache max size set, skip quick init.')
return None
p = self.redis.pipeline()
has_new_cached_captures = False
for uuid, directory in self.redis.hscan_iter('lookup_dirs'):

View File

@ -77,7 +77,12 @@ class CaptureSettings(CaptureSettingsCore, total=False):
class Lookyloo():
def __init__(self) -> None:
def __init__(self, cache_max_size: int | None=None) -> None:
'''Initialize lookyloo.
:param cache_max_size: The maximum size of the cache. Alows to display captures metadata without getting it from redis
This cache is *not* useful for background indexing or pickle building, only for the front end.
So it should always be None *unless* we're running the background processes.
'''
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel'))
self.user_agents = UserAgents()
@ -149,7 +154,7 @@ class Lookyloo():
self.context = Context()
self.logger.info('Context initialized.')
self.logger.info('Initializing index...')
self._captures_index = CapturesIndex(self.redis, self.context)
self._captures_index = CapturesIndex(self.redis, self.context, maxsize=cache_max_size)
self.logger.info('Index initialized.')
# init lacus