fix: indexer getting stuck when we had more than one at a time

pull/746/head
Raphaël Vinot 2023-07-25 17:08:00 +02:00
parent 92260b5a0a
commit 0c7b3d9106
2 changed files with 31 additions and 16 deletions

View File

@ -5,12 +5,13 @@ import logging.config
import os import os
import shutil import shutil
from datetime import datetime
from typing import Optional from typing import Optional
from lookyloo.default import AbstractManager, get_config from lookyloo.default import AbstractManager, get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.lookyloo import Lookyloo from lookyloo.lookyloo import Lookyloo
from lookyloo.helpers import is_locked from lookyloo.helpers import is_locked, try_make_file
logging.config.dictConfig(get_config('logging')) logging.config.dictConfig(get_config('logging'))
@ -38,7 +39,13 @@ class BackgroundIndexer(AbstractManager):
or not list(uuid_path.parent.rglob('*.har.gz'))): or not list(uuid_path.parent.rglob('*.har.gz'))):
continue continue
if is_locked(uuid_path.parent): lock_file = uuid_path.parent / 'lock'
if try_make_file(lock_file):
# Lock created, we can process
with lock_file.open('w') as f:
f.write(f"{datetime.now().isoformat()};{os.getpid()}")
elif is_locked(uuid_path.parent):
# it is really locked
continue continue
with uuid_path.open() as f: with uuid_path.open() as f:

View File

@ -3,6 +3,7 @@ import hashlib
import json import json
import logging import logging
import os import os
import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
from functools import lru_cache from functools import lru_cache
@ -225,6 +226,7 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[Union[s
def is_locked(locked_dir_path: Path, /) -> bool: def is_locked(locked_dir_path: Path, /) -> bool:
"""Check if a capture directory is locked, if the lock is recent enough, """Check if a capture directory is locked, if the lock is recent enough,
and if the locking process is still running. and if the locking process is still running.
Note: if the lock has been set by the same process, we ignore it.
:param locked_dir_path: Path of the directory. :param locked_dir_path: Path of the directory.
""" """
@ -234,19 +236,25 @@ def is_locked(locked_dir_path: Path, /) -> bool:
return False return False
try: try:
content = ''
while not content:
with lock_file.open('r') as f: with lock_file.open('r') as f:
content = f.read() if content := f.read():
if ';' in content: break
# The file is empty, we're between the creation and setting the content
logger.info(f'Lock file empty ({lock_file}), waiting...')
time.sleep(1)
ts, pid = content.split(';') ts, pid = content.split(';')
if pid == str(os.getpid()):
# Locked by the same PID, was locked by the indexer.
return False
try: try:
os.kill(int(pid), 0) os.kill(int(pid), 0)
except OSError: except OSError:
logger.info(f'Lock by dead script {lock_file}, removing it.') logger.info(f'Lock by dead script {lock_file}, removing it.')
lock_file.unlink(missing_ok=True) lock_file.unlink(missing_ok=True)
return False return False
else:
# Old format
ts = content
lock_ts = datetime.fromisoformat(ts) lock_ts = datetime.fromisoformat(ts)
if lock_ts < datetime.now() - timedelta(minutes=30): if lock_ts < datetime.now() - timedelta(minutes=30):
@ -255,7 +263,7 @@ def is_locked(locked_dir_path: Path, /) -> bool:
lock_file.unlink(missing_ok=True) lock_file.unlink(missing_ok=True)
return False return False
except Exception as e: except Exception as e:
logger.critical(f'Lock found, but uanble to open it: {e}.') logger.critical(f'Lock found, but unable process it: {e}.')
return False return False
# The lockfile is here for a good reason. # The lockfile is here for a good reason.