fix: indexer getting stuck when we had more than one at a time

pull/746/head
Raphaël Vinot 2023-07-25 17:08:00 +02:00
parent 92260b5a0a
commit 0c7b3d9106
2 changed files with 31 additions and 16 deletions

View File

@ -5,12 +5,13 @@ import logging.config
import os
import shutil
from datetime import datetime
from typing import Optional
from lookyloo.default import AbstractManager, get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.lookyloo import Lookyloo
from lookyloo.helpers import is_locked
from lookyloo.helpers import is_locked, try_make_file
logging.config.dictConfig(get_config('logging'))
@ -38,7 +39,13 @@ class BackgroundIndexer(AbstractManager):
or not list(uuid_path.parent.rglob('*.har.gz'))):
continue
if is_locked(uuid_path.parent):
lock_file = uuid_path.parent / 'lock'
if try_make_file(lock_file):
# Lock created, we can process
with lock_file.open('w') as f:
f.write(f"{datetime.now().isoformat()};{os.getpid()}")
elif is_locked(uuid_path.parent):
# it is really locked
continue
with uuid_path.open() as f:

View File

@ -3,6 +3,7 @@ import hashlib
import json
import logging
import os
import time
from datetime import datetime, timedelta
from functools import lru_cache
@ -225,6 +226,7 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[Union[s
def is_locked(locked_dir_path: Path, /) -> bool:
"""Check if a capture directory is locked, if the lock is recent enough,
and if the locking process is still running.
Note: if the lock has been set by the same process, we ignore it.
:param locked_dir_path: Path of the directory.
"""
@ -234,19 +236,25 @@ def is_locked(locked_dir_path: Path, /) -> bool:
return False
try:
with lock_file.open('r') as f:
content = f.read()
if ';' in content:
ts, pid = content.split(';')
try:
os.kill(int(pid), 0)
except OSError:
logger.info(f'Lock by dead script {lock_file}, removing it.')
lock_file.unlink(missing_ok=True)
return False
else:
# Old format
ts = content
content = ''
while not content:
with lock_file.open('r') as f:
if content := f.read():
break
# The file is empty, we're between the creation and setting the content
logger.info(f'Lock file empty ({lock_file}), waiting...')
time.sleep(1)
ts, pid = content.split(';')
if pid == str(os.getpid()):
# Locked by the same PID, was locked by the indexer.
return False
try:
os.kill(int(pid), 0)
except OSError:
logger.info(f'Lock by dead script {lock_file}, removing it.')
lock_file.unlink(missing_ok=True)
return False
lock_ts = datetime.fromisoformat(ts)
if lock_ts < datetime.now() - timedelta(minutes=30):
@ -255,7 +263,7 @@ def is_locked(locked_dir_path: Path, /) -> bool:
lock_file.unlink(missing_ok=True)
return False
except Exception as e:
logger.critical(f'Lock found, but uanble to open it: {e}.')
logger.critical(f'Lock found, but unable process it: {e}.')
return False
# The lockfile is here for a good reason.