From 0c7b3d9106a407805e9b663136e9aa31c6ac186f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 25 Jul 2023 17:08:00 +0200 Subject: [PATCH] fix: indexer getting stuck when we had more than one at a time --- bin/background_indexer.py | 11 +++++++++-- lookyloo/helpers.py | 36 ++++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/bin/background_indexer.py b/bin/background_indexer.py index 10d7c66b..e769d180 100755 --- a/bin/background_indexer.py +++ b/bin/background_indexer.py @@ -5,12 +5,13 @@ import logging.config import os import shutil +from datetime import datetime from typing import Optional from lookyloo.default import AbstractManager, get_config from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.lookyloo import Lookyloo -from lookyloo.helpers import is_locked +from lookyloo.helpers import is_locked, try_make_file logging.config.dictConfig(get_config('logging')) @@ -38,7 +39,13 @@ class BackgroundIndexer(AbstractManager): or not list(uuid_path.parent.rglob('*.har.gz'))): continue - if is_locked(uuid_path.parent): + lock_file = uuid_path.parent / 'lock' + if try_make_file(lock_file): + # Lock created, we can process + with lock_file.open('w') as f: + f.write(f"{datetime.now().isoformat()};{os.getpid()}") + elif is_locked(uuid_path.parent): + # it is really locked continue with uuid_path.open() as f: diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index 6e9b8025..2dcff753 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -3,6 +3,7 @@ import hashlib import json import logging import os +import time from datetime import datetime, timedelta from functools import lru_cache @@ -225,6 +226,7 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[Union[s def is_locked(locked_dir_path: Path, /) -> bool: """Check if a capture directory is locked, if the lock is recent enough, and if the locking process is still running. + Note: if the lock has been set by the same process, we ignore it. :param locked_dir_path: Path of the directory. """ @@ -234,19 +236,25 @@ def is_locked(locked_dir_path: Path, /) -> bool: return False try: - with lock_file.open('r') as f: - content = f.read() - if ';' in content: - ts, pid = content.split(';') - try: - os.kill(int(pid), 0) - except OSError: - logger.info(f'Lock by dead script {lock_file}, removing it.') - lock_file.unlink(missing_ok=True) - return False - else: - # Old format - ts = content + content = '' + while not content: + with lock_file.open('r') as f: + if content := f.read(): + break + # The file is empty, we're between the creation and setting the content + logger.info(f'Lock file empty ({lock_file}), waiting...') + time.sleep(1) + + ts, pid = content.split(';') + if pid == str(os.getpid()): + # Locked by the same PID, was locked by the indexer. + return False + try: + os.kill(int(pid), 0) + except OSError: + logger.info(f'Lock by dead script {lock_file}, removing it.') + lock_file.unlink(missing_ok=True) + return False lock_ts = datetime.fromisoformat(ts) if lock_ts < datetime.now() - timedelta(minutes=30): @@ -255,7 +263,7 @@ def is_locked(locked_dir_path: Path, /) -> bool: lock_file.unlink(missing_ok=True) return False except Exception as e: - logger.critical(f'Lock found, but uanble to open it: {e}.') + logger.critical(f'Lock found, but unable process it: {e}.') return False # The lockfile is here for a good reason.