fix: indexer getting stuck when we had more than one at a time

2023-07-25 17:08:00 +02:00 · 2023-07-25 17:08:00 +02:00 · 0c7b3d9106
parent 92260b5a0a
commit 0c7b3d9106
2 changed files with 31 additions and 16 deletions
--- a/bin/background_indexer.py
+++ b/bin/background_indexer.py
@ -5,12 +5,13 @@ import logging.config
 import os
 import shutil
 from datetime import datetime
 from typing import Optional
 from lookyloo.default import AbstractManager, get_config
 from lookyloo.exceptions import MissingUUID, NoValidHarFile
 from lookyloo.lookyloo import Lookyloo
-from lookyloo.helpers import is_locked
+from lookyloo.helpers import is_locked, try_make_file
 logging.config.dictConfig(get_config('logging'))
@ -38,7 +39,13 @@ class BackgroundIndexer(AbstractManager):
                    or not list(uuid_path.parent.rglob('*.har.gz'))):
                continue
-            if is_locked(uuid_path.parent):
+            lock_file = uuid_path.parent / 'lock'
            if try_make_file(lock_file):
                # Lock created, we can process
                with lock_file.open('w') as f:
                    f.write(f"{datetime.now().isoformat()};{os.getpid()}")
            elif is_locked(uuid_path.parent):
                # it is really locked
                continue
            with uuid_path.open() as f:
--- a/lookyloo/helpers.py
+++ b/lookyloo/helpers.py
@ -3,6 +3,7 @@ import hashlib
 import json
 import logging
 import os
 import time
 from datetime import datetime, timedelta
 from functools import lru_cache
@ -225,6 +226,7 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[Union[s
 def is_locked(locked_dir_path: Path, /) -> bool:
    """Check if a capture directory is locked, if the lock is recent enough,
    and if the locking process is still running.
    Note: if the lock has been set by the same process, we ignore it.
    :param locked_dir_path: Path of the directory.
    """
@ -234,19 +236,25 @@ def is_locked(locked_dir_path: Path, /) -> bool:
        return False
    try:
        content = ''
        while not content:
            with lock_file.open('r') as f:
-            content = f.read()
+                if content := f.read():
-            if ';' in content:
+                    break
                # The file is empty, we're between the creation and setting the content
                logger.info(f'Lock file empty ({lock_file}), waiting...')
                time.sleep(1)
        ts, pid = content.split(';')
        if pid == str(os.getpid()):
            # Locked by the same PID, was locked by the indexer.
            return False
        try:
            os.kill(int(pid), 0)
        except OSError:
            logger.info(f'Lock by dead script {lock_file}, removing it.')
            lock_file.unlink(missing_ok=True)
            return False
            else:
                # Old format
                ts = content
        lock_ts = datetime.fromisoformat(ts)
        if lock_ts < datetime.now() - timedelta(minutes=30):
@ -255,7 +263,7 @@ def is_locked(locked_dir_path: Path, /) -> bool:
            lock_file.unlink(missing_ok=True)
            return False
    except Exception as e:
-        logger.critical(f'Lock found, but uanble to open it: {e}.')
+        logger.critical(f'Lock found, but unable process it: {e}.')
        return False
    # The lockfile is here for a good reason.