fix: Few more improvments on lockfile and broken captures.

pull/746/head
Raphaël Vinot 2023-07-25 20:16:48 +02:00
parent 484aec5ddd
commit ef3432cbed
3 changed files with 23 additions and 10 deletions

View File

@ -38,10 +38,6 @@ class BackgroundIndexer(AbstractManager):
# We already have a pickle file
self.logger.debug(f'{uuid_path.parent} has a pickle.')
continue
elif not list(uuid_path.parent.rglob('*.har.gz')) and not list(uuid_path.parent.rglob('*.har')):
# No HAR file
self.logger.info(f'{uuid_path.parent} has no HAR file.')
continue
if is_locked(uuid_path.parent):
# it is really locked
@ -50,6 +46,14 @@ class BackgroundIndexer(AbstractManager):
with uuid_path.open() as f:
uuid = f.read()
if not list(uuid_path.parent.rglob('*.har.gz')) and not list(uuid_path.parent.rglob('*.har')):
# No HAR file
self.logger.warning(f'{uuid_path.parent} has no HAR file.')
self.lookyloo.redis.hdel('lookup_dirs', uuid)
shutil.move(str(uuid_path.parent), str(self.discarded_captures_dir / uuid_path.parent.name))
continue
if not self.lookyloo.redis.hexists('lookup_dirs', uuid):
# The capture with this UUID exists, but it is for some reason missing in lookup_dirs
self.lookyloo.redis.hset('lookup_dirs', uuid, str(uuid_path.parent))

View File

@ -273,7 +273,11 @@ class CapturesIndex(Mapping):
# The pickle is being created somewhere else, wait until it's done.
while is_locked(capture_dir):
time.sleep(5)
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime, logger)
try:
return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime, logger)
except TreeNeedsRebuild:
# If this exception is raised, the building failed somewhere else, let's give it another shot.
pass
if not (har_files := sorted(capture_dir.glob('*.har'))):
har_files = sorted(capture_dir.glob('*.har.gz'))

View File

@ -226,7 +226,6 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[Union[s
def is_locked(locked_dir_path: Path, /) -> bool:
"""Check if a capture directory is locked, if the lock is recent enough,
and if the locking process is still running.
Note: if the lock has been set by the same process, we ignore it.
:param locked_dir_path: Path of the directory.
"""
@ -237,13 +236,19 @@ def is_locked(locked_dir_path: Path, /) -> bool:
try:
content = ''
while not content:
max_wait_content = 5
while max_wait_content > 0:
with lock_file.open('r') as f:
if content := f.read():
break
# The file is empty, we're between the creation and setting the content
logger.info(f'Lock file empty ({lock_file}), waiting...')
time.sleep(1)
# The file is empty, we're between the creation and setting the content
logger.info(f'Lock file empty ({lock_file}), waiting...')
max_wait_content -= 1
time.sleep(1)
else:
logger.warning('Lock file empty for too long, removing it.')
lock_file.unlink(missing_ok=True)
return False
ts, pid = content.split(';')
try: