mirror of https://github.com/CIRCL/lookyloo
chg: Use connection pool whenever possible
parent
7495e5b4f1
commit
6be9b69d95
|
@ -61,11 +61,12 @@ class BackgroundIndexer(AbstractManager):
|
|||
uuid_path.parent.rename(self.discarded_captures_dir / uuid_path.parent.name)
|
||||
|
||||
def _check_indexes(self):
|
||||
index_redis = self.lookyloo.indexing.redis
|
||||
for cache in self.lookyloo.sorted_capture_cache():
|
||||
if self.lookyloo.is_public_instance and cache.no_index:
|
||||
# Capture unindexed
|
||||
continue
|
||||
p = self.lookyloo.indexing.redis.pipeline()
|
||||
p = index_redis.pipeline()
|
||||
p.sismember('indexed_urls', cache.uuid)
|
||||
p.sismember('indexed_body_hashes', cache.uuid)
|
||||
p.sismember('indexed_cookies', cache.uuid)
|
||||
|
|
|
@ -126,7 +126,7 @@ class Indexing():
|
|||
pipeline.execute()
|
||||
|
||||
def get_hash_uuids(self, body_hash: str) -> Tuple[str, str, str]:
|
||||
capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures') # type: ignore
|
||||
capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures')
|
||||
entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0]
|
||||
urlnode_uuid, hostnode_uuid, url = entry.split('|', 2)
|
||||
return capture_uuid, urlnode_uuid, hostnode_uuid
|
||||
|
@ -204,7 +204,7 @@ class Indexing():
|
|||
|
||||
@property
|
||||
def categories(self) -> List[Tuple[str, int]]:
|
||||
return [(c, int(score)) # type: ignore
|
||||
return [(c, int(score))
|
||||
for c, score in self.redis.zrevrange('categories', 0, 200, withscores=True)]
|
||||
|
||||
def index_categories_capture(self, capture_uuid: str, categories: Iterable[str]):
|
||||
|
|
|
@ -656,11 +656,12 @@ class Lookyloo():
|
|||
return to_return
|
||||
|
||||
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
|
||||
if self.redis.zrank('to_capture', capture_uuid) is not None:
|
||||
redis = self.redis # use a single connection
|
||||
if redis.zrank('to_capture', capture_uuid) is not None:
|
||||
return CaptureStatus.QUEUED
|
||||
elif self.redis.hexists('lookup_dirs', capture_uuid):
|
||||
elif redis.hexists('lookup_dirs', capture_uuid):
|
||||
return CaptureStatus.DONE
|
||||
elif self.redis.sismember('ongoing', capture_uuid):
|
||||
elif redis.sismember('ongoing', capture_uuid):
|
||||
return CaptureStatus.ONGOING
|
||||
return CaptureStatus.UNKNOWN
|
||||
|
||||
|
@ -684,7 +685,8 @@ class Lookyloo():
|
|||
|
||||
def process_capture_queue(self) -> Union[bool, None]:
|
||||
'''Process a query from the capture queue'''
|
||||
if not self.redis.exists('to_capture'):
|
||||
redis = self.redis # use a single connection
|
||||
if not redis.exists('to_capture'):
|
||||
return None
|
||||
|
||||
status, message = self.splash_status()
|
||||
|
@ -692,18 +694,18 @@ class Lookyloo():
|
|||
self.logger.critical(f'Splash is not running, unable to process the capture queue: {message}')
|
||||
return None
|
||||
|
||||
value = self.redis.zpopmax('to_capture')
|
||||
value = redis.zpopmax('to_capture')
|
||||
if not value or not value[0]:
|
||||
return None
|
||||
uuid, score = value[0]
|
||||
queue: str = self.redis.get(f'{uuid}_mgmt')
|
||||
self.redis.sadd('ongoing', uuid)
|
||||
queue: str = redis.get(f'{uuid}_mgmt')
|
||||
redis.sadd('ongoing', uuid)
|
||||
|
||||
lazy_cleanup = self.redis.pipeline()
|
||||
lazy_cleanup = redis.pipeline()
|
||||
lazy_cleanup.delete(f'{uuid}_mgmt')
|
||||
lazy_cleanup.zincrby('queues', -1, queue)
|
||||
|
||||
to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid)
|
||||
to_capture: Dict[str, Union[str, int, float]] = redis.hgetall(uuid)
|
||||
to_capture['perma_uuid'] = uuid
|
||||
if 'cookies' in to_capture:
|
||||
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
|
||||
|
|
Loading…
Reference in New Issue