From 36d39f6076b4f31f098abe2e1619b9b9613e8831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Sun, 26 Feb 2023 17:20:17 +0100 Subject: [PATCH] new: Add PID in lock file, allows to check if the locking process is still there --- bin/background_indexer.py | 15 +++----------- lookyloo/capturecache.py | 6 +++--- lookyloo/helpers.py | 41 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/bin/background_indexer.py b/bin/background_indexer.py index ef945646..0db91315 100755 --- a/bin/background_indexer.py +++ b/bin/background_indexer.py @@ -3,12 +3,12 @@ import logging import logging.config import os -from datetime import datetime, timedelta import shutil from lookyloo.default import AbstractManager, get_config from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.lookyloo import Lookyloo +from lookyloo.helpers import is_locked logging.config.dictConfig(get_config('logging')) @@ -35,17 +35,8 @@ class BackgroundIndexer(AbstractManager): or not list(uuid_path.parent.rglob('*.har')) or not list(uuid_path.parent.rglob('*.har.gz'))): continue - lock_file = uuid_path.parent / 'lock' - if lock_file.exists(): - try: - with lock_file.open('r') as f: - lock_ts = datetime.fromisoformat(f.read()) - if lock_ts < datetime.now() - timedelta(minutes=5): - # Clear old locks. They shouldn't be there, but it's gonna happen. - self.logger.info(f'Old lock found {lock_file}, removing it.') - lock_file.unlink(missing_ok=True) - except Exception as e: - self.logger.info(f'Error while reading lock {lock_file}: {e}') + + if is_locked(uuid_path.parent): continue with uuid_path.open() as f: diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index 5f42806b..badddceb 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -22,7 +22,7 @@ from pyipasnhistory import IPASNHistory from redis import Redis from .context import Context -from .helpers import get_captures_dir +from .helpers import get_captures_dir, is_locked from .indexing import Indexing from .default import LookylooException, try_make_file, get_config from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, TreeNeedsRebuild @@ -237,10 +237,10 @@ class CapturesIndex(Mapping): if try_make_file(lock_file): # Lock created, we can process with lock_file.open('w') as f: - f.write(datetime.now().isoformat()) + f.write(f"{datetime.now().isoformat()};{os.getpid()}") else: # The pickle is being created somewhere else, wait until it's done. - while lock_file.exists(): + while is_locked(capture_dir): time.sleep(5) return load_pickle_tree(capture_dir, capture_dir.stat().st_mtime) diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index 35b5fb50..27b9a80c 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -2,6 +2,7 @@ import hashlib import json import logging +import os from datetime import datetime, timedelta from functools import lru_cache @@ -217,6 +218,46 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[Union[s return root / digest[0] / digest[1] / digest[2] / digest +def is_locked(locked_dir_path: Path, /) -> bool: + """Check if a capture directory is locked, if the lock is recent enough, + and if the locking process is still running. + + :param locked_dir_path: Path of the directory. + """ + lock_file = locked_dir_path / 'lock' + if not lock_file.exists(): + # No lock file + return False + + try: + with lock_file.open('r') as f: + content = f.read() + if ';' in content: + ts, pid = content.split(';') + try: + os.kill(int(pid), 0) + except OSError: + logger.info(f'Lock by dead script {lock_file}, removing it.') + lock_file.unlink(missing_ok=True) + return False + else: + # Old format + ts = content + + lock_ts = datetime.fromisoformat(ts) + if lock_ts < datetime.now() - timedelta(minutes=30): + # Clear old locks. They shouldn't be there, but it's gonna happen. + logger.info(f'Old lock ({lock_ts.isoformat()}) {lock_file}, removing it.') + lock_file.unlink(missing_ok=True) + return False + except Exception as e: + logger.critical(f'Lock found, but uanble to open it: {e}.') + return False + + # The lockfile is here for a good reason. + return True + + class ParsedUserAgent(UserAgent): # from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/