chg: Use connection pool whenever possible

pull/251/head
Raphaël Vinot 2021-08-18 18:01:04 +02:00
parent 7495e5b4f1
commit 6be9b69d95
3 changed files with 15 additions and 12 deletions

View File

@ -61,11 +61,12 @@ class BackgroundIndexer(AbstractManager):
uuid_path.parent.rename(self.discarded_captures_dir / uuid_path.parent.name)
def _check_indexes(self):
index_redis = self.lookyloo.indexing.redis
for cache in self.lookyloo.sorted_capture_cache():
if self.lookyloo.is_public_instance and cache.no_index:
# Capture unindexed
continue
p = self.lookyloo.indexing.redis.pipeline()
p = index_redis.pipeline()
p.sismember('indexed_urls', cache.uuid)
p.sismember('indexed_body_hashes', cache.uuid)
p.sismember('indexed_cookies', cache.uuid)

View File

@ -126,7 +126,7 @@ class Indexing():
pipeline.execute()
def get_hash_uuids(self, body_hash: str) -> Tuple[str, str, str]:
capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures') # type: ignore
capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures')
entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0]
urlnode_uuid, hostnode_uuid, url = entry.split('|', 2)
return capture_uuid, urlnode_uuid, hostnode_uuid
@ -204,7 +204,7 @@ class Indexing():
@property
def categories(self) -> List[Tuple[str, int]]:
return [(c, int(score)) # type: ignore
return [(c, int(score))
for c, score in self.redis.zrevrange('categories', 0, 200, withscores=True)]
def index_categories_capture(self, capture_uuid: str, categories: Iterable[str]):

View File

@ -656,11 +656,12 @@ class Lookyloo():
return to_return
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
if self.redis.zrank('to_capture', capture_uuid) is not None:
redis = self.redis # use a single connection
if redis.zrank('to_capture', capture_uuid) is not None:
return CaptureStatus.QUEUED
elif self.redis.hexists('lookup_dirs', capture_uuid):
elif redis.hexists('lookup_dirs', capture_uuid):
return CaptureStatus.DONE
elif self.redis.sismember('ongoing', capture_uuid):
elif redis.sismember('ongoing', capture_uuid):
return CaptureStatus.ONGOING
return CaptureStatus.UNKNOWN
@ -684,7 +685,8 @@ class Lookyloo():
def process_capture_queue(self) -> Union[bool, None]:
'''Process a query from the capture queue'''
if not self.redis.exists('to_capture'):
redis = self.redis # use a single connection
if not redis.exists('to_capture'):
return None
status, message = self.splash_status()
@ -692,18 +694,18 @@ class Lookyloo():
self.logger.critical(f'Splash is not running, unable to process the capture queue: {message}')
return None
value = self.redis.zpopmax('to_capture')
value = redis.zpopmax('to_capture')
if not value or not value[0]:
return None
uuid, score = value[0]
queue: str = self.redis.get(f'{uuid}_mgmt')
self.redis.sadd('ongoing', uuid)
queue: str = redis.get(f'{uuid}_mgmt')
redis.sadd('ongoing', uuid)
lazy_cleanup = self.redis.pipeline()
lazy_cleanup = redis.pipeline()
lazy_cleanup.delete(f'{uuid}_mgmt')
lazy_cleanup.zincrby('queues', -1, queue)
to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid)
to_capture: Dict[str, Union[str, int, float]] = redis.hgetall(uuid)
to_capture['perma_uuid'] = uuid
if 'cookies' in to_capture:
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')