From 6be9b69d9552f5a4e754e66a0933b83ade6152a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 18 Aug 2021 18:01:04 +0200 Subject: [PATCH] chg: Use connection pool whenever possible --- bin/background_indexer.py | 3 ++- lookyloo/indexing.py | 4 ++-- lookyloo/lookyloo.py | 20 +++++++++++--------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/bin/background_indexer.py b/bin/background_indexer.py index 1ed46c67..e6e3af8d 100755 --- a/bin/background_indexer.py +++ b/bin/background_indexer.py @@ -61,11 +61,12 @@ class BackgroundIndexer(AbstractManager): uuid_path.parent.rename(self.discarded_captures_dir / uuid_path.parent.name) def _check_indexes(self): + index_redis = self.lookyloo.indexing.redis for cache in self.lookyloo.sorted_capture_cache(): if self.lookyloo.is_public_instance and cache.no_index: # Capture unindexed continue - p = self.lookyloo.indexing.redis.pipeline() + p = index_redis.pipeline() p.sismember('indexed_urls', cache.uuid) p.sismember('indexed_body_hashes', cache.uuid) p.sismember('indexed_cookies', cache.uuid) diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index 67c98617..032c0ef4 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -126,7 +126,7 @@ class Indexing(): pipeline.execute() def get_hash_uuids(self, body_hash: str) -> Tuple[str, str, str]: - capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures') # type: ignore + capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures') entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0] urlnode_uuid, hostnode_uuid, url = entry.split('|', 2) return capture_uuid, urlnode_uuid, hostnode_uuid @@ -204,7 +204,7 @@ class Indexing(): @property def categories(self) -> List[Tuple[str, int]]: - return [(c, int(score)) # type: ignore + return [(c, int(score)) for c, score in self.redis.zrevrange('categories', 0, 200, withscores=True)] def index_categories_capture(self, capture_uuid: str, categories: Iterable[str]): diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index eddb3dcf..8403da10 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -656,11 +656,12 @@ class Lookyloo(): return to_return def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus: - if self.redis.zrank('to_capture', capture_uuid) is not None: + redis = self.redis # use a single connection + if redis.zrank('to_capture', capture_uuid) is not None: return CaptureStatus.QUEUED - elif self.redis.hexists('lookup_dirs', capture_uuid): + elif redis.hexists('lookup_dirs', capture_uuid): return CaptureStatus.DONE - elif self.redis.sismember('ongoing', capture_uuid): + elif redis.sismember('ongoing', capture_uuid): return CaptureStatus.ONGOING return CaptureStatus.UNKNOWN @@ -684,7 +685,8 @@ class Lookyloo(): def process_capture_queue(self) -> Union[bool, None]: '''Process a query from the capture queue''' - if not self.redis.exists('to_capture'): + redis = self.redis # use a single connection + if not redis.exists('to_capture'): return None status, message = self.splash_status() @@ -692,18 +694,18 @@ class Lookyloo(): self.logger.critical(f'Splash is not running, unable to process the capture queue: {message}') return None - value = self.redis.zpopmax('to_capture') + value = redis.zpopmax('to_capture') if not value or not value[0]: return None uuid, score = value[0] - queue: str = self.redis.get(f'{uuid}_mgmt') - self.redis.sadd('ongoing', uuid) + queue: str = redis.get(f'{uuid}_mgmt') + redis.sadd('ongoing', uuid) - lazy_cleanup = self.redis.pipeline() + lazy_cleanup = redis.pipeline() lazy_cleanup.delete(f'{uuid}_mgmt') lazy_cleanup.zincrby('queues', -1, queue) - to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid) + to_capture: Dict[str, Union[str, int, float]] = redis.hgetall(uuid) to_capture['perma_uuid'] = uuid if 'cookies' in to_capture: to_capture['cookies_pseudofile'] = to_capture.pop('cookies')