chg: Disable index cache for backgroupd processes

pull/894/head
Raphaël Vinot 2024-03-12 12:02:10 +01:00
parent df1e3228c9
commit 926c0da23e
4 changed files with 18 additions and 6 deletions

View File

@ -24,7 +24,7 @@ class BackgroundBuildCaptures(AbstractManager):
def __init__(self, loglevel: int | None=None): def __init__(self, loglevel: int | None=None):
super().__init__(loglevel) super().__init__(loglevel)
self.lookyloo = Lookyloo() self.lookyloo = Lookyloo(cache_max_size=1)
self.script_name = 'background_build_captures' self.script_name = 'background_build_captures'
# make sure discarded captures dir exists # make sure discarded captures dir exists
self.captures_dir = get_captures_dir() self.captures_dir = get_captures_dir()

View File

@ -21,7 +21,7 @@ class BackgroundIndexer(AbstractManager):
def __init__(self, full: bool=False, loglevel: int | None=None): def __init__(self, full: bool=False, loglevel: int | None=None):
super().__init__(loglevel) super().__init__(loglevel)
self.lookyloo = Lookyloo() self.lookyloo = Lookyloo(cache_max_size=1)
self.is_public_instance = get_config('generic', 'public_instance') self.is_public_instance = get_config('generic', 'public_instance')
self.full_indexer = full self.full_indexer = full
self.indexing = Indexing(full_index=self.full_indexer) self.indexing = Indexing(full_index=self.full_indexer)

View File

@ -13,6 +13,7 @@ import signal
import sys import sys
import time import time
from collections import OrderedDict
from collections.abc import Mapping from collections.abc import Mapping
from datetime import datetime from datetime import datetime
from functools import lru_cache, _CacheInfo as CacheInfo from functools import lru_cache, _CacheInfo as CacheInfo
@ -172,13 +173,14 @@ def serialize_sets(obj: Any) -> Any:
class CapturesIndex(Mapping): # type: ignore[type-arg] class CapturesIndex(Mapping): # type: ignore[type-arg]
def __init__(self, redis: Redis, contextualizer: Context | None=None) -> None: # type: ignore[type-arg] def __init__(self, redis: Redis, contextualizer: Context | None=None, maxsize: int | None=None) -> None: # type: ignore[type-arg]
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel')) self.logger.setLevel(get_config('generic', 'loglevel'))
self.redis = redis self.redis = redis
self.indexing = Indexing() self.indexing = Indexing()
self.contextualizer = contextualizer self.contextualizer = contextualizer
self.__cache: dict[str, CaptureCache] = {} self.__cache_max_size = maxsize
self.__cache: dict[str, CaptureCache] = OrderedDict()
self._quick_init() self._quick_init()
self.timeout = get_config('generic', 'max_tree_create_time') self.timeout = get_config('generic', 'max_tree_create_time')
try: try:
@ -203,6 +205,8 @@ class CapturesIndex(Mapping): # type: ignore[type-arg]
return set(self.__cache.keys()) return set(self.__cache.keys())
def __getitem__(self, uuid: str) -> CaptureCache: def __getitem__(self, uuid: str) -> CaptureCache:
if self.__cache_max_size is not None and len(self.__cache) > self.__cache_max_size:
self.__cache.popitem()
if uuid in self.__cache: if uuid in self.__cache:
if self.__cache[uuid].capture_dir.exists(): if self.__cache[uuid].capture_dir.exists():
return self.__cache[uuid] return self.__cache[uuid]
@ -251,6 +255,9 @@ class CapturesIndex(Mapping): # type: ignore[type-arg]
def _quick_init(self) -> None: def _quick_init(self) -> None:
'''Initialize the cache with a list of UUIDs, with less back and forth with redis. '''Initialize the cache with a list of UUIDs, with less back and forth with redis.
Only get recent captures.''' Only get recent captures.'''
if self.__cache_max_size is not None:
self.logger.info('Cache max size set, skip quick init.')
return None
p = self.redis.pipeline() p = self.redis.pipeline()
has_new_cached_captures = False has_new_cached_captures = False
for uuid, directory in self.redis.hscan_iter('lookup_dirs'): for uuid, directory in self.redis.hscan_iter('lookup_dirs'):

View File

@ -77,7 +77,12 @@ class CaptureSettings(CaptureSettingsCore, total=False):
class Lookyloo(): class Lookyloo():
def __init__(self) -> None: def __init__(self, cache_max_size: int | None=None) -> None:
'''Initialize lookyloo.
:param cache_max_size: The maximum size of the cache. Alows to display captures metadata without getting it from redis
This cache is *not* useful for background indexing or pickle building, only for the front end.
So it should always be None *unless* we're running the background processes.
'''
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel')) self.logger.setLevel(get_config('generic', 'loglevel'))
self.user_agents = UserAgents() self.user_agents = UserAgents()
@ -149,7 +154,7 @@ class Lookyloo():
self.context = Context() self.context = Context()
self.logger.info('Context initialized.') self.logger.info('Context initialized.')
self.logger.info('Initializing index...') self.logger.info('Initializing index...')
self._captures_index = CapturesIndex(self.redis, self.context) self._captures_index = CapturesIndex(self.redis, self.context, maxsize=cache_max_size)
self.logger.info('Index initialized.') self.logger.info('Index initialized.')
# init lacus # init lacus