mirror of https://github.com/CIRCL/lookyloo
fix: Avoid race condition when re-enqueuing
parent
5634eaa27a
commit
416ca7224e
|
@ -1,6 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import time
|
||||||
import logging
|
import logging
|
||||||
import logging.config
|
import logging.config
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
@ -73,39 +74,59 @@ class Processing(AbstractManager):
|
||||||
def _retry_failed_enqueue(self):
|
def _retry_failed_enqueue(self):
|
||||||
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
|
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
|
||||||
for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
|
for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
|
||||||
if (self.lookyloo.redis.hexists(uuid, 'not_queued')
|
try_reenqueue = False
|
||||||
or self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]):
|
if self.lookyloo.redis.hexists(uuid, 'not_queued'):
|
||||||
self.logger.info(f'Found a non-queued capture ({uuid}), retrying now.')
|
# The capture is marked as not queued
|
||||||
# This capture couldn't be queued and we created the uuid locally
|
try_reenqueue = True
|
||||||
query = self.lookyloo.redis.hgetall(uuid)
|
elif self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]:
|
||||||
try:
|
# The capture is unknown on lacus side. It might be a race condition.
|
||||||
self.lookyloo.lacus.enqueue(
|
# Let's retry a few times.
|
||||||
url=query.get('url', None),
|
retry = 3
|
||||||
document_name=query.get('document_name', None),
|
while retry > 0:
|
||||||
document=query.get('document', None),
|
if self.lookyloo.lacus.get_capture_status(uuid) not in [CaptureStatusPy.UNKNOWN, CaptureStatusCore.UNKNOWN]:
|
||||||
# depth=query.get('depth', 0),
|
# Was a race condition, the UUID is being processed by Lacus
|
||||||
browser=query.get('browser', None),
|
break
|
||||||
device_name=query.get('device_name', None),
|
retry -= 1
|
||||||
user_agent=query.get('user_agent', None),
|
time.sleep(3)
|
||||||
proxy=query.get('proxy', None),
|
|
||||||
general_timeout_in_sec=query.get('general_timeout_in_sec', None),
|
|
||||||
cookies=query.get('cookies', None),
|
|
||||||
headers=query.get('headers', None),
|
|
||||||
http_credentials=query.get('http_credentials', None),
|
|
||||||
viewport=query.get('viewport', None),
|
|
||||||
referer=query.get('referer', None),
|
|
||||||
rendered_hostname_only=query.get('rendered_hostname_only', True),
|
|
||||||
# force=query.get('force', False),
|
|
||||||
# recapture_interval=query.get('recapture_interval', 300),
|
|
||||||
priority=query.get('priority', None),
|
|
||||||
uuid=uuid
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
self.logger.warning(f'Still unable to enqueue capture: {e}')
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
self.lookyloo.redis.hdel(uuid, 'not_queued')
|
# UUID is still unknown
|
||||||
self.logger.info(f'{uuid} enqueued.')
|
try_reenqueue = True
|
||||||
|
if not try_reenqueue:
|
||||||
|
continue
|
||||||
|
self.logger.info(f'Found a non-queued capture ({uuid}), retrying now.')
|
||||||
|
# This capture couldn't be queued and we created the uuid locally
|
||||||
|
query = self.lookyloo.redis.hgetall(uuid)
|
||||||
|
try:
|
||||||
|
new_uuid = self.lookyloo.lacus.enqueue(
|
||||||
|
url=query.get('url', None),
|
||||||
|
document_name=query.get('document_name', None),
|
||||||
|
document=query.get('document', None),
|
||||||
|
# depth=query.get('depth', 0),
|
||||||
|
browser=query.get('browser', None),
|
||||||
|
device_name=query.get('device_name', None),
|
||||||
|
user_agent=query.get('user_agent', None),
|
||||||
|
proxy=query.get('proxy', None),
|
||||||
|
general_timeout_in_sec=query.get('general_timeout_in_sec', None),
|
||||||
|
cookies=query.get('cookies', None),
|
||||||
|
headers=query.get('headers', None),
|
||||||
|
http_credentials=query.get('http_credentials', None),
|
||||||
|
viewport=query.get('viewport', None),
|
||||||
|
referer=query.get('referer', None),
|
||||||
|
rendered_hostname_only=query.get('rendered_hostname_only', True),
|
||||||
|
# force=query.get('force', False),
|
||||||
|
# recapture_interval=query.get('recapture_interval', 300),
|
||||||
|
priority=query.get('priority', None),
|
||||||
|
uuid=uuid
|
||||||
|
)
|
||||||
|
if new_uuid != uuid:
|
||||||
|
# somehow, between the check and queuing, the UUID isn't UNKNOWN anymore, just checking that
|
||||||
|
self.logger.warning(f'Had to change the capture UUID (duplicate). Old: {uuid} / New: {new_uuid}')
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f'Still unable to enqueue capture: {e}')
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.lookyloo.redis.hdel(uuid, 'not_queued')
|
||||||
|
self.logger.info(f'{uuid} enqueued.')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
Loading…
Reference in New Issue