mirror of https://github.com/CIRCL/lookyloo
chg: Disable index cache for backgroupd processes
parent
df1e3228c9
commit
926c0da23e
|
@ -24,7 +24,7 @@ class BackgroundBuildCaptures(AbstractManager):
|
||||||
|
|
||||||
def __init__(self, loglevel: int | None=None):
|
def __init__(self, loglevel: int | None=None):
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
self.lookyloo = Lookyloo()
|
self.lookyloo = Lookyloo(cache_max_size=1)
|
||||||
self.script_name = 'background_build_captures'
|
self.script_name = 'background_build_captures'
|
||||||
# make sure discarded captures dir exists
|
# make sure discarded captures dir exists
|
||||||
self.captures_dir = get_captures_dir()
|
self.captures_dir = get_captures_dir()
|
||||||
|
|
|
@ -21,7 +21,7 @@ class BackgroundIndexer(AbstractManager):
|
||||||
|
|
||||||
def __init__(self, full: bool=False, loglevel: int | None=None):
|
def __init__(self, full: bool=False, loglevel: int | None=None):
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
self.lookyloo = Lookyloo()
|
self.lookyloo = Lookyloo(cache_max_size=1)
|
||||||
self.is_public_instance = get_config('generic', 'public_instance')
|
self.is_public_instance = get_config('generic', 'public_instance')
|
||||||
self.full_indexer = full
|
self.full_indexer = full
|
||||||
self.indexing = Indexing(full_index=self.full_indexer)
|
self.indexing = Indexing(full_index=self.full_indexer)
|
||||||
|
|
|
@ -13,6 +13,7 @@ import signal
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import lru_cache, _CacheInfo as CacheInfo
|
from functools import lru_cache, _CacheInfo as CacheInfo
|
||||||
|
@ -172,13 +173,14 @@ def serialize_sets(obj: Any) -> Any:
|
||||||
|
|
||||||
class CapturesIndex(Mapping): # type: ignore[type-arg]
|
class CapturesIndex(Mapping): # type: ignore[type-arg]
|
||||||
|
|
||||||
def __init__(self, redis: Redis, contextualizer: Context | None=None) -> None: # type: ignore[type-arg]
|
def __init__(self, redis: Redis, contextualizer: Context | None=None, maxsize: int | None=None) -> None: # type: ignore[type-arg]
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||||
self.redis = redis
|
self.redis = redis
|
||||||
self.indexing = Indexing()
|
self.indexing = Indexing()
|
||||||
self.contextualizer = contextualizer
|
self.contextualizer = contextualizer
|
||||||
self.__cache: dict[str, CaptureCache] = {}
|
self.__cache_max_size = maxsize
|
||||||
|
self.__cache: dict[str, CaptureCache] = OrderedDict()
|
||||||
self._quick_init()
|
self._quick_init()
|
||||||
self.timeout = get_config('generic', 'max_tree_create_time')
|
self.timeout = get_config('generic', 'max_tree_create_time')
|
||||||
try:
|
try:
|
||||||
|
@ -203,6 +205,8 @@ class CapturesIndex(Mapping): # type: ignore[type-arg]
|
||||||
return set(self.__cache.keys())
|
return set(self.__cache.keys())
|
||||||
|
|
||||||
def __getitem__(self, uuid: str) -> CaptureCache:
|
def __getitem__(self, uuid: str) -> CaptureCache:
|
||||||
|
if self.__cache_max_size is not None and len(self.__cache) > self.__cache_max_size:
|
||||||
|
self.__cache.popitem()
|
||||||
if uuid in self.__cache:
|
if uuid in self.__cache:
|
||||||
if self.__cache[uuid].capture_dir.exists():
|
if self.__cache[uuid].capture_dir.exists():
|
||||||
return self.__cache[uuid]
|
return self.__cache[uuid]
|
||||||
|
@ -251,6 +255,9 @@ class CapturesIndex(Mapping): # type: ignore[type-arg]
|
||||||
def _quick_init(self) -> None:
|
def _quick_init(self) -> None:
|
||||||
'''Initialize the cache with a list of UUIDs, with less back and forth with redis.
|
'''Initialize the cache with a list of UUIDs, with less back and forth with redis.
|
||||||
Only get recent captures.'''
|
Only get recent captures.'''
|
||||||
|
if self.__cache_max_size is not None:
|
||||||
|
self.logger.info('Cache max size set, skip quick init.')
|
||||||
|
return None
|
||||||
p = self.redis.pipeline()
|
p = self.redis.pipeline()
|
||||||
has_new_cached_captures = False
|
has_new_cached_captures = False
|
||||||
for uuid, directory in self.redis.hscan_iter('lookup_dirs'):
|
for uuid, directory in self.redis.hscan_iter('lookup_dirs'):
|
||||||
|
|
|
@ -77,7 +77,12 @@ class CaptureSettings(CaptureSettingsCore, total=False):
|
||||||
|
|
||||||
class Lookyloo():
|
class Lookyloo():
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self, cache_max_size: int | None=None) -> None:
|
||||||
|
'''Initialize lookyloo.
|
||||||
|
:param cache_max_size: The maximum size of the cache. Alows to display captures metadata without getting it from redis
|
||||||
|
This cache is *not* useful for background indexing or pickle building, only for the front end.
|
||||||
|
So it should always be None *unless* we're running the background processes.
|
||||||
|
'''
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||||
self.user_agents = UserAgents()
|
self.user_agents = UserAgents()
|
||||||
|
@ -149,7 +154,7 @@ class Lookyloo():
|
||||||
self.context = Context()
|
self.context = Context()
|
||||||
self.logger.info('Context initialized.')
|
self.logger.info('Context initialized.')
|
||||||
self.logger.info('Initializing index...')
|
self.logger.info('Initializing index...')
|
||||||
self._captures_index = CapturesIndex(self.redis, self.context)
|
self._captures_index = CapturesIndex(self.redis, self.context, maxsize=cache_max_size)
|
||||||
self.logger.info('Index initialized.')
|
self.logger.info('Index initialized.')
|
||||||
|
|
||||||
# init lacus
|
# init lacus
|
||||||
|
|
Loading…
Reference in New Issue