fix: Avoid race condition when re-enqueuing

pull/773/head
Raphaël Vinot 2023-09-01 16:00:45 +02:00
parent 5634eaa27a
commit 416ca7224e
1 changed files with 53 additions and 32 deletions

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import json import json
import time
import logging import logging
import logging.config import logging.config
from collections import Counter from collections import Counter
@ -73,13 +74,30 @@ class Processing(AbstractManager):
def _retry_failed_enqueue(self): def _retry_failed_enqueue(self):
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID''' '''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'): for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
if (self.lookyloo.redis.hexists(uuid, 'not_queued') try_reenqueue = False
or self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]): if self.lookyloo.redis.hexists(uuid, 'not_queued'):
# The capture is marked as not queued
try_reenqueue = True
elif self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]:
# The capture is unknown on lacus side. It might be a race condition.
# Let's retry a few times.
retry = 3
while retry > 0:
if self.lookyloo.lacus.get_capture_status(uuid) not in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]:
# Was a race condition, the UUID is being processed by Lacus
break
retry -= 1
time.sleep(3)
else:
# UUID is still unknown
try_reenqueue = True
if not try_reenqueue:
continue
self.logger.info(f'Found a non-queued capture ({uuid}), retrying now.') self.logger.info(f'Found a non-queued capture ({uuid}), retrying now.')
# This capture couldn't be queued and we created the uuid locally # This capture couldn't be queued and we created the uuid locally
query = self.lookyloo.redis.hgetall(uuid) query = self.lookyloo.redis.hgetall(uuid)
try: try:
self.lookyloo.lacus.enqueue( new_uuid = self.lookyloo.lacus.enqueue(
url=query.get('url', None), url=query.get('url', None),
document_name=query.get('document_name', None), document_name=query.get('document_name', None),
document=query.get('document', None), document=query.get('document', None),
@ -100,6 +118,9 @@ class Processing(AbstractManager):
priority=query.get('priority', None), priority=query.get('priority', None),
uuid=uuid uuid=uuid
) )
if new_uuid != uuid:
# somehow, between the check and queuing, the UUID isn't UNKNOWN anymore, just checking that
self.logger.warning(f'Had to change the capture UUID (duplicate). Old: {uuid} / New: {new_uuid}')
except Exception as e: except Exception as e:
self.logger.warning(f'Still unable to enqueue capture: {e}') self.logger.warning(f'Still unable to enqueue capture: {e}')
break break